diff --git a/.clang-format b/.clang-format index d8f273702c8..2da3911dced 100644 --- a/.clang-format +++ b/.clang-format @@ -21,7 +21,6 @@ ConstructorInitializerAllOnOneLineOrOnePerLine: true ExperimentalAutoDetectBinPacking: true UseTab: Never TabWidth: 4 -IndentWidth: 4 Standard: Cpp11 PointerAlignment: Middle MaxEmptyLinesToKeep: 2 diff --git a/.clang-tidy b/.clang-tidy index 4dd8b9859c9..85989d311a2 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -23,9 +23,12 @@ Checks: '*, -bugprone-implicit-widening-of-multiplication-result, -bugprone-narrowing-conversions, -bugprone-not-null-terminated-result, + -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged -bugprone-unchecked-optional-access, -cert-dcl16-c, + -cert-dcl37-c, + -cert-dcl51-cpp, -cert-err58-cpp, -cert-msc32-c, -cert-msc51-cpp, @@ -38,6 +41,8 @@ Checks: '*, -clang-analyzer-security.insecureAPI.strcpy, -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-const-or-ref-data-members, + -cppcoreguidelines-avoid-do-while, -cppcoreguidelines-avoid-goto, -cppcoreguidelines-avoid-magic-numbers, -cppcoreguidelines-avoid-non-const-global-variables, @@ -105,6 +110,8 @@ Checks: '*, -misc-const-correctness, -misc-no-recursion, -misc-non-private-member-variables-in-classes, + -misc-confusable-identifiers, # useful but slooow + -misc-use-anonymous-namespace, -modernize-avoid-c-arrays, -modernize-concat-nested-namespaces, @@ -125,10 +132,12 @@ Checks: '*, -portability-simd-intrinsics, -readability-braces-around-statements, + -readability-convert-member-functions-to-static, -readability-else-after-return, -readability-function-cognitive-complexity, -readability-function-size, -readability-identifier-length, + -readability-identifier-naming, # useful but too slow -readability-implicit-bool-conversion, -readability-isolate-declaration, -readability-magic-numbers, @@ -140,74 +149,32 @@ Checks: '*, -readability-uppercase-literal-suffix, -readability-use-anyofallof, - -zirkon-*, - - -misc-*, # temporarily disabled due to being too slow - # also disable checks in other categories which are aliases of checks in misc-*: - # https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html - -cert-dcl54-cpp, # alias of misc-new-delete-overloads - -hicpp-new-delete-operators, # alias of misc-new-delete-overloads - -cert-fio38-c, # alias of misc-non-copyable-objects - -cert-dcl03-c, # alias of misc-static-assert - -hicpp-static-assert, # alias of misc-static-assert - -cert-err09-cpp, # alias of misc-throw-by-value-catch-by-reference - -cert-err61-cpp, # alias of misc-throw-by-value-catch-by-reference - -cppcoreguidelines-c-copy-assignment-signature, # alias of misc-unconventional-assign-operator - -cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes + -zircon-*, ' WarningsAsErrors: '*' -# TODO: use dictionary syntax for CheckOptions when minimum clang-tidy level rose to 15 -# some-check.SomeOption: 'some value' -# instead of -# - key: some-check.SomeOption -# value: 'some value' CheckOptions: - - key: readability-identifier-naming.ClassCase - value: CamelCase - - key: readability-identifier-naming.EnumCase - value: CamelCase - - key: readability-identifier-naming.LocalVariableCase - value: lower_case - - key: readability-identifier-naming.StaticConstantCase - value: aNy_CasE - - key: readability-identifier-naming.MemberCase - value: lower_case - - key: readability-identifier-naming.PrivateMemberPrefix - value: '' - - key: readability-identifier-naming.ProtectedMemberPrefix - value: '' - - key: readability-identifier-naming.PublicMemberCase - value: lower_case - - key: readability-identifier-naming.MethodCase - value: camelBack - - key: readability-identifier-naming.PrivateMethodPrefix - value: '' - - key: readability-identifier-naming.ProtectedMethodPrefix - value: '' - - key: readability-identifier-naming.ParameterPackCase - value: lower_case - - key: readability-identifier-naming.StructCase - value: CamelCase - - key: readability-identifier-naming.TemplateTemplateParameterCase - value: CamelCase - - key: readability-identifier-naming.TemplateUsingCase - value: lower_case - - key: readability-identifier-naming.TypeTemplateParameterCase - value: CamelCase - - key: readability-identifier-naming.TypedefCase - value: CamelCase - - key: readability-identifier-naming.UnionCase - value: CamelCase - - key: readability-identifier-naming.UsingCase - value: CamelCase - - key: modernize-loop-convert.UseCxx20ReverseRanges - value: false - - key: performance-move-const-arg.CheckTriviallyCopyableMove - value: false - # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 - - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp - value: expr-type - - key: cppcoreguidelines-avoid-do-while.IgnoreMacros - value: true + readability-identifier-naming.ClassCase: CamelCase + readability-identifier-naming.EnumCase: CamelCase + readability-identifier-naming.LocalVariableCase: lower_case + readability-identifier-naming.StaticConstantCase: aNy_CasE + readability-identifier-naming.MemberCase: lower_case + readability-identifier-naming.PrivateMemberPrefix: '' + readability-identifier-naming.ProtectedMemberPrefix: '' + readability-identifier-naming.PublicMemberCase: lower_case + readability-identifier-naming.MethodCase: camelBack + readability-identifier-naming.PrivateMethodPrefix: '' + readability-identifier-naming.ProtectedMethodPrefix: '' + readability-identifier-naming.ParameterPackCase: lower_case + readability-identifier-naming.StructCase: CamelCase + readability-identifier-naming.TemplateTemplateParameterCase: CamelCase + readability-identifier-naming.TemplateParameterCase: lower_case + readability-identifier-naming.TypeTemplateParameterCase: CamelCase + readability-identifier-naming.TypedefCase: CamelCase + readability-identifier-naming.UnionCase: CamelCase + modernize-loop-convert.UseCxx20ReverseRanges: false + performance-move-const-arg.CheckTriviallyCopyableMove: false + # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 + readability-identifier-naming.TypeTemplateParameterIgnoredRegexp: expr-type + cppcoreguidelines-avoid-do-while.IgnoreMacros: true diff --git a/.clangd b/.clangd new file mode 100644 index 00000000000..ad471db8d8b --- /dev/null +++ b/.clangd @@ -0,0 +1,16 @@ +Diagnostics: + # clangd does parse .clang-tidy, but some checks are too slow to run in + # clang-tidy build, so let's enable them explicitly for clangd at least. + ClangTidy: + # The following checks had been disabled due to slowliness with C++23, + # for more details see [1]. + # + # [1]: https://github.com/llvm/llvm-project/issues/61418 + # + # But the code base had been written in a style that had been checked + # by this check, so at least, let's enable it for clangd. + Add: [ + # configured in .clang-tidy + readability-identifier-naming, + bugprone-reserved-identifier, + ] diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5d09d3a9ef3..db170c3e28f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,16 +2,16 @@ A technical comment, you are free to remove or leave it as it is when PR is created The following categories are used in the next scripts, update them accordingly utils/changelog/changelog.py -tests/ci/run_check.py +tests/ci/cancel_and_rerun_workflow_lambda/app.py --> ### Changelog category (leave one): - New Feature - Improvement -- Bug Fix (user-visible misbehavior in official stable or prestable release) - Performance Improvement - Backward Incompatible Change - Build/Testing/Packaging Improvement - Documentation (changelog entry is not required) +- Bug Fix (user-visible misbehavior in an official stable release) - Not for changelog (changelog entry is not required) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 110c06631c7..d69168b01ee 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -9,8 +9,22 @@ on: # yamllint disable-line rule:truthy branches: - 'backport/**' jobs: + CheckLabels: + runs-on: [self-hosted, style-checker] + # Run the first check always, even if the CI is cancelled + if: ${{ always() }} + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Labels check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 run_check.py PythonUnitTests: runs-on: [self-hosted, style-checker] + needs: CheckLabels steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -22,6 +36,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] + needs: CheckLabels steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -38,6 +53,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: runs-on: [self-hosted, style-checker] + needs: CheckLabels steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -79,7 +95,7 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -98,12 +114,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -302,6 +349,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -421,8 +475,9 @@ jobs: - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head --no-push - python3 docker_server.py --release-type head --no-push --no-ubuntu \ + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-server --image-path docker/server + python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() @@ -741,7 +796,8 @@ jobs: - FunctionalStatefulTestDebug - StressTestTsan - IntegrationTestsRelease - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/.github/workflows/cherry_pick.yml b/.github/workflows/cherry_pick.yml index 065e584182b..8d1e2055978 100644 --- a/.github/workflows/cherry_pick.yml +++ b/.github/workflows/cherry_pick.yml @@ -35,7 +35,6 @@ jobs: fetch-depth: 0 - name: Cherry pick run: | - sudo pip install GitPython cd "$GITHUB_WORKSPACE/tests/ci" python3 cherry_pick.py - name: Cleanup diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7e045992dee..f0741b5465f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -110,7 +110,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -129,12 +129,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -456,6 +487,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -829,8 +867,9 @@ jobs: - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head - python3 docker_server.py --release-type head --no-ubuntu \ + python3 docker_server.py --release-type head \ + --image-repo clickhouse/clickhouse-server --image-path docker/server + python3 docker_server.py --release-type head \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() @@ -1099,7 +1138,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1135,6 +1174,114 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseDatabaseReplicated2: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_database_replicated + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, DatabaseReplicated) + REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseDatabaseReplicated3: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_database_replicated + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, DatabaseReplicated) + REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseS3_0: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_s3_storage + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, s3 storage) + REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports @@ -1158,7 +1305,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - FunctionalStatelessTestReleaseS3: + FunctionalStatelessTestReleaseS3_1: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] steps: @@ -1170,6 +1317,42 @@ jobs: CHECK_NAME=Stateless tests (release, s3 storage) REPO_COPY=${{runner.temp}}/stateless_s3_storage/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseAnalyzer: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_analyzer + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, analyzer) + REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse + KILL_TIMEOUT=10800 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1239,7 +1422,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1275,7 +1458,79 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (asan) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (asan) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1311,7 +1566,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1347,7 +1602,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1383,7 +1638,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1406,7 +1661,79 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" - FunctionalStatelessTestUBsan: + FunctionalStatelessTestTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestTsan4: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (tsan) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestUBsan0: needs: [BuilderDebUBsan] runs-on: [self-hosted, func-tester] steps: @@ -1418,6 +1745,44 @@ jobs: CHECK_NAME=Stateless tests (ubsan) REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestUBsan1: + needs: [BuilderDebUBsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_ubsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (ubsan) + REPO_COPY=${{runner.temp}}/stateless_ubsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1453,7 +1818,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1489,7 +1854,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1525,7 +1890,115 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestMsan3: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (msan) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestMsan4: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (msan) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestMsan5: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (msan) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1561,7 +2034,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1597,7 +2070,7 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -1633,7 +2106,79 @@ jobs: REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestDebug3: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=5 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestDebug4: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=5 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2084,7 +2629,7 @@ jobs: CHECK_NAME=Integration tests (asan) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2119,7 +2664,7 @@ jobs: CHECK_NAME=Integration tests (asan) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2154,7 +2699,112 @@ jobs: CHECK_NAME=Integration tests (asan) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2189,7 +2839,7 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2224,7 +2874,7 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2259,7 +2909,7 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=2 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2294,7 +2944,77 @@ jobs: CHECK_NAME=Integration tests (tsan) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse RUN_BY_HASH_NUM=3 - RUN_BY_HASH_TOTAL=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsTsan4: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (tsan) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsTsan5: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (tsan) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2329,7 +3049,7 @@ jobs: CHECK_NAME=Integration tests (release) REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse RUN_BY_HASH_NUM=0 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -2364,7 +3084,77 @@ jobs: CHECK_NAME=Integration tests (release) REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse RUN_BY_HASH_NUM=1 - RUN_BY_HASH_TOTAL=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsRelease2: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (release) + REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsRelease3: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (release) + REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v3 @@ -3084,23 +3874,36 @@ jobs: - FunctionalStatelessTestDebug0 - FunctionalStatelessTestDebug1 - FunctionalStatelessTestDebug2 + - FunctionalStatelessTestDebug3 + - FunctionalStatelessTestDebug4 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary - FunctionalStatelessTestReleaseDatabaseReplicated0 - FunctionalStatelessTestReleaseDatabaseReplicated1 + - FunctionalStatelessTestReleaseDatabaseReplicated2 + - FunctionalStatelessTestReleaseDatabaseReplicated3 - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 + - FunctionalStatelessTestAsan2 + - FunctionalStatelessTestAsan3 - FunctionalStatelessTestTsan0 - FunctionalStatelessTestTsan1 - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestTsan3 + - FunctionalStatelessTestTsan4 - FunctionalStatelessTestMsan0 - FunctionalStatelessTestMsan1 - FunctionalStatelessTestMsan2 - - FunctionalStatelessTestUBsan + - FunctionalStatelessTestMsan3 + - FunctionalStatelessTestMsan4 + - FunctionalStatelessTestMsan5 + - FunctionalStatelessTestUBsan0 + - FunctionalStatelessTestUBsan1 - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease - - FunctionalStatelessTestReleaseS3 + - FunctionalStatelessTestReleaseS3_0 + - FunctionalStatelessTestReleaseS3_1 - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan @@ -3114,17 +3917,25 @@ jobs: - IntegrationTestsAsan0 - IntegrationTestsAsan1 - IntegrationTestsAsan2 + - IntegrationTestsAsan3 + - IntegrationTestsAsan4 + - IntegrationTestsAsan5 - IntegrationTestsRelease0 - IntegrationTestsRelease1 + - IntegrationTestsRelease2 + - IntegrationTestsRelease3 - IntegrationTestsTsan0 - IntegrationTestsTsan1 - IntegrationTestsTsan2 - IntegrationTestsTsan3 + - IntegrationTestsTsan4 + - IntegrationTestsTsan5 - PerformanceComparisonX86-0 - PerformanceComparisonX86-1 - PerformanceComparisonX86-2 - PerformanceComparisonX86-3 - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 - ASTFuzzerTestDebug - ASTFuzzerTestAsan - ASTFuzzerTestTsan diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index f6d6d192f48..acf6bbe8f6a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -72,6 +72,9 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json + Codebrowser: + needs: [DockerHubPush] + uses: ./.github/workflows/woboq.yml BuilderCoverity: needs: DockerHubPush runs-on: [self-hosted, builder] @@ -118,13 +121,15 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: + # TODO: Remove if: whenever SonarCloud supports c++23 + if: ${{ false }} runs-on: [self-hosted, builder] env: - SONAR_SCANNER_VERSION: 4.7.0.2747 + SONAR_SCANNER_VERSION: 4.8.0.2856 SONAR_SERVER_URL: "https://sonarcloud.io" BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed - CC: clang-15 - CXX: clang++-15 + CC: clang-16 + CXX: clang++-16 steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -173,4 +178,4 @@ jobs: --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.projectKey="ClickHouse_ClickHouse" \ --define sonar.organization="clickhouse-java" \ - --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" + --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" \ diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7d410f833c5..506ed451b6d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -37,7 +37,6 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 run_check.py PythonUnitTests: - needs: CheckLabels runs-on: [self-hosted, style-checker] steps: - name: Check out repository code @@ -174,7 +173,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -193,12 +192,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -520,6 +550,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -886,8 +923,9 @@ jobs: - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head --no-push - python3 docker_server.py --release-type head --no-push --no-ubuntu \ + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-server --image-path docker/server + python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() @@ -1270,6 +1308,40 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseAnalyzer: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_analyzer + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, analyzer) + REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseS3_0: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -4717,6 +4789,7 @@ jobs: - FunctionalStatelessTestReleaseDatabaseReplicated2 - FunctionalStatelessTestReleaseDatabaseReplicated3 - FunctionalStatelessTestReleaseWideParts + - FunctionalStatelessTestReleaseAnalyzer - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 @@ -4792,7 +4865,8 @@ jobs: - UnitTestsMsan - UnitTestsUBsan - UnitTestsReleaseClang - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 - IntegrationTestsFlakyCheck - SQLancerTestRelease - SQLancerTestDebug @@ -4807,3 +4881,41 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py python3 merge_pr.py --check-approved +############################################################################################## +########################### SQLLOGIC TEST ################################################### +############################################################################################## + SQLLogicTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqllogic_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Sqllogic test (release) + REPO_COPY=${{runner.temp}}/sqllogic_debug/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Sqllogic test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqllogic_test.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2ef05fe989b..0742ebfd449 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,15 +7,28 @@ on: # yamllint disable-line rule:truthy release: types: - published + workflow_dispatch: + inputs: + tag: + description: 'Release tag' + required: true + type: string jobs: ReleasePublish: runs-on: [self-hosted, style-checker] steps: + - name: Set tag from input + if: github.event_name == 'workflow_dispatch' + run: | + echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV" + - name: Set tag from REF + if: github.event_name == 'release' + run: | + echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Deploy packages and assets run: | - GITHUB_TAG="${GITHUB_REF#refs/tags/}" - curl --silent --data '' \ + curl --silent --data '' --no-buffer \ '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true' ############################################################################################ ##################################### Docker images ####################################### @@ -23,16 +36,26 @@ jobs: DockerServerImages: runs-on: [self-hosted, style-checker] steps: + - name: Set tag from input + if: github.event_name == 'workflow_dispatch' + run: | + echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV" + - name: Set tag from REF + if: github.event_name == 'release' + run: | + echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" - name: Check out repository code uses: ClickHouse/checkout@v1 with: clear-repository: true fetch-depth: 0 # otherwise we will have no version info + ref: ${{ env.GITHUB_TAG }} - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type auto --version "${{ github.ref }}" - python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \ + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \ + --image-repo clickhouse/clickhouse-server --image-path docker/server + python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4d2a99c2106..21284815583 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -71,7 +71,7 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/changed_images.json - CompatibilityCheck: + CompatibilityCheckX86: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] steps: @@ -90,12 +90,43 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.REPORTS_PATH }} - - name: CompatibilityCheck + - name: CompatibilityCheckX86 run: | sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + CompatibilityCheckAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/compatibility_check + REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: CompatibilityCheckAarch64 + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc - name: Cleanup if: always() run: | @@ -375,6 +406,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -494,8 +532,9 @@ jobs: - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type head --no-push - python3 docker_server.py --release-type head --no-push --no-ubuntu \ + python3 docker_server.py --release-type head --no-push \ + --image-repo clickhouse/clickhouse-server --image-path docker/server + python3 docker_server.py --release-type head --no-push \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() @@ -1947,7 +1986,8 @@ jobs: - IntegrationTestsTsan1 - IntegrationTestsTsan2 - IntegrationTestsTsan3 - - CompatibilityCheck + - CompatibilityCheckX86 + - CompatibilityCheckAarch64 runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml index 363652c9f33..bdfbc8fef9c 100644 --- a/.github/workflows/woboq.yml +++ b/.github/workflows/woboq.yml @@ -6,9 +6,8 @@ env: concurrency: group: woboq on: # yamllint disable-line rule:truthy - schedule: - - cron: '0 */18 * * *' workflow_dispatch: + workflow_call: jobs: # don't use dockerhub push because this image updates so rarely WoboqCodebrowser: @@ -26,6 +25,10 @@ jobs: with: clear-repository: true submodules: 'true' + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.IMAGES_PATH }} - name: Codebrowser run: | sudo rm -fr "$TEMP_PATH" diff --git a/.gitignore b/.gitignore index 14b860244c2..a04c60d5ca3 100644 --- a/.gitignore +++ b/.gitignore @@ -129,7 +129,6 @@ website/package-lock.json /.ccls-cache # clangd cache -/.clangd /.cache /compile_commands.json diff --git a/.gitmodules b/.gitmodules index ca55281e643..f0984fec4db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -267,7 +267,7 @@ url = https://github.com/ClickHouse/nats.c [submodule "contrib/vectorscan"] path = contrib/vectorscan - url = https://github.com/VectorCamp/vectorscan + url = https://github.com/VectorCamp/vectorscan.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares @@ -296,6 +296,9 @@ [submodule "contrib/libdivide"] path = contrib/libdivide url = https://github.com/ridiculousfish/libdivide +[submodule "contrib/libbcrypt"] + path = contrib/libbcrypt + url = https://github.com/rg3/libbcrypt.git [submodule "contrib/ulid-c"] path = contrib/ulid-c url = https://github.com/ClickHouse/ulid-c.git @@ -335,3 +338,9 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/libfiu"] + path = contrib/libfiu + url = https://github.com/ClickHouse/libfiu.git +[submodule "contrib/isa-l"] + path = contrib/isa-l + url = https://github.com/ClickHouse/isa-l.git diff --git a/CHANGELOG.md b/CHANGELOG.md index e22377e2332..1ccd4f9846d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,343 @@ ### Table of Contents +**[ClickHouse release v23.4, 2023-04-26](#234)**
+**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
**[ClickHouse release v23.2, 2023-02-23](#232)**
**[ClickHouse release v23.1, 2023-01-25](#231)**
**[Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022/)**
# 2023 Changelog +### ClickHouse release 23.4, 2023-04-26 + +#### Backward Incompatible Change +* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* This change makes sense only if you are using the virtual filesystem cache. If `path` in the virtual filesystem cache configuration is not empty and is not an absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). + +#### New Feature +* Support new aggregate function `quantileGK`/`quantilesGK`, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). +* Add a statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `kafkaMurmurHash` function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). +* Add statistical aggregate function `kolmogorovSmirnovTest`. Close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). +* Add `soundex` function for compatibility. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). +* Support `Map` type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). +* Add `PrettyJSONEachRow` format to output pretty JSON with new line delimiters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `ParquetMetadata` input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e. log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). +* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). +* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). + +#### Performance Improvement +* Reading files in `Parquet` format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). +* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can look up the set in the cache, wait for it to be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). +* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). +* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continuously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica decides to read with a different algorithm–an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes, and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). +* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). Query processing is parallelized right after reading from any data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). This is controlled by the setting `parallelize_output_from_storages` which is not enabled by default. +* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). +* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove the excessive connection attempts if the `skip_unavailable_shards` setting is enabled. [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). + +#### Experimental Feature +* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). +* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). +* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implement zero-copy-replication (an experimental feature) on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Improvement +* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). +* Several improvements around data lakes: - Make `Iceberg` work with non-partitioned data. - Support `Iceberg` format version v2 (previously only v1 was supported) - Support reading partitioned data for `DeltaLake`/`Hudi` - Faster reading of `DeltaLake` metadata by using Delta's checkpoint files - Fixed incorrect `Hudi` reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for `Iceberg`/`DeltaLake`/`Hudi` using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). +* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). +* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). +* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). +* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). +* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from JSON string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complex type value, such as a JSON object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). +* For `use_structure_from_insertion_table_in_table_functions` more flexible insert table structure propagation to table function. Fixed an issue with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Do not continue retrying to connect to Keeper if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). +* Support Enum output/input in `BSONEachRow`, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). +* Support more ClickHouse types in `ORC`/`Arrow`/`Parquet` formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32, and we couldn't read it back), fix reading Nullable(IPv6) from binary data for `ORC`. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). +* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `Enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). +* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function mapFromArrays supports `Map` type as an input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). +* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). +* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). +* Support more types in `CapnProto` format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). +* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). +* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). +* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow to write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are matched by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). +* `parseDateTime` now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). +* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). +* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). +* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). +* HTTP temporary buffers now support working by evicting data from the virtual filesystem cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). +* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). +* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix some MySQL-related settings not being handled with the MySQL dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `RabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). +* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). +* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). +* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). +* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). +* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). +* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). +* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or macOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). +* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). +* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). +* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). +* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). +* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). +* Fix a bug in LLVM's libc++ leading to a crash for uploading parts to S3 which size is greater than INT_MAX [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). +* Fix overflow in the `sparkbar` function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). +* Fix race in S3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). +* Disable JIT for aggregate functions due to inconsistent behavior [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). +* Fix CPU usage in RabbitMQ (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). +* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). +* Fix a startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). +* Update only affected rows in KeeperMap storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible segfault in the VFS cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `toTimeZone` function throws an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). +* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). +* "changed" flag in system.settings was calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bracketed-paste mode messing up password input in the event of client reconnection [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix an uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). +* The `groupArray` aggregate function correctly works for empty result over nullable types [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Allow IPv4 comparison operators with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Async inserts with empty data will no longer throw exception. [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). +* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). +* If the primary key has duplicate columns (which is only possible for projections), in previous versions it might lead to a bug [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). +* Fix for a race condition in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). +* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). +* Fix reading `Date32` Parquet/Arrow column into not a `Date32` column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `UNKNOWN_IDENTIFIER` error while selecting from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix aggregation by empty nullable strings [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). + +### ClickHouse release 23.3 LTS, 2023-03-30 + +#### Upgrade Notes +* Lightweight DELETEs are production ready and enabled by default. The `DELETE` query for MergeTree tables is now available by default. +* The behavior of `*domain*RFC` and `netloc` functions is slightly changed: relaxed the set of symbols that are allowed in the URL authority for better conformance. [#46841](https://github.com/ClickHouse/ClickHouse/pull/46841) ([Azat Khuzhin](https://github.com/azat)). +* Prohibited creating tables based on KafkaEngine with DEFAULT/EPHEMERAL/ALIAS/MATERIALIZED statements for columns. [#47138](https://github.com/ClickHouse/ClickHouse/pull/47138) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* An "asynchronous connection drain" feature is removed. Related settings and metrics are removed as well. It was an internal feature, so the removal should not affect users who had never heard about that feature. [#47486](https://github.com/ClickHouse/ClickHouse/pull/47486) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support 256-bit Decimal data type (more than 38 digits) in `arraySum`/`Min`/`Max`/`Avg`/`Product`, `arrayCumSum`/`CumSumNonNegative`, `arrayDifference`, array construction, IN operator, query parameters, `groupArrayMovingSum`, statistical functions, `min`/`max`/`any`/`argMin`/`argMax`, PostgreSQL wire protocol, MySQL table engine and function, `sumMap`, `mapAdd`, `mapSubtract`, `arrayIntersect`. Add support for big integers in `arrayIntersect`. Statistical aggregate functions involving moments (such as `corr` or various `TTest`s) will use `Float64` as their internal representation (they were using `Decimal128` before this change, but it was pointless), and these functions can return `nan` instead of `inf` in case of infinite variance. Some functions were allowed on `Decimal256` data types but returned `Decimal128` in previous versions - now it is fixed. This closes [#47569](https://github.com/ClickHouse/ClickHouse/issues/47569). This closes [#44864](https://github.com/ClickHouse/ClickHouse/issues/44864). This closes [#28335](https://github.com/ClickHouse/ClickHouse/issues/28335). [#47594](https://github.com/ClickHouse/ClickHouse/pull/47594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make backup_threads/restore_threads server settings (instead of user settings). [#47881](https://github.com/ClickHouse/ClickHouse/pull/47881) ([Azat Khuzhin](https://github.com/azat)). +* Do not allow const and non-deterministic secondary indices [#46839](https://github.com/ClickHouse/ClickHouse/pull/46839) ([Anton Popov](https://github.com/CurtizJ)). + +#### New Feature +* Add a new mode for splitting the work on replicas using settings `parallel_replicas_custom_key` and `parallel_replicas_custom_key_filter_type`. If the cluster consists of a single shard with multiple replicas, up to `max_parallel_replicas` will be randomly picked and turned into shards. For each shard, a corresponding filter is added to the query on the initiator before being sent to the shard. If the cluster consists of multiple shards, it will behave the same as `sample_key` but with the possibility to define an arbitrary key. [#45108](https://github.com/ClickHouse/ClickHouse/pull/45108) ([Antonio Andelic](https://github.com/antonio2368)). +* An option to display partial result on cancel: Added query setting `partial_result_on_first_cancel` allowing the canceled query (e.g. due to Ctrl-C) to return a partial result. [#45689](https://github.com/ClickHouse/ClickHouse/pull/45689) ([Alexey Perevyshin](https://github.com/alexX512)). +* Added support of arbitrary tables engines for temporary tables (except for Replicated and KeeperMap engines). Close [#31497](https://github.com/ClickHouse/ClickHouse/issues/31497). [#46071](https://github.com/ClickHouse/ClickHouse/pull/46071) ([Roman Vasin](https://github.com/rvasin)). +* Add support for replication of user-defined SQL functions using centralized storage in Keeper. [#46085](https://github.com/ClickHouse/ClickHouse/pull/46085) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Implement `system.server_settings` (similar to `system.settings`), which will contain server configurations. [#46550](https://github.com/ClickHouse/ClickHouse/pull/46550) ([pufit](https://github.com/pufit)). +* Support for `UNDROP TABLE` query. Closes [#46811](https://github.com/ClickHouse/ClickHouse/issues/46811). [#47241](https://github.com/ClickHouse/ClickHouse/pull/47241) ([chen](https://github.com/xiedeyantu)). +* Allow separate grants for named collections (e.g. to be able to give `SHOW/CREATE/ALTER/DROP named collection` access only to certain collections, instead of all at once). Closes [#40894](https://github.com/ClickHouse/ClickHouse/issues/40894). Add new access type `NAMED_COLLECTION_CONTROL` which is not given to user default unless explicitly added to the user config (is required to be able to do `GRANT ALL`), also `show_named_collections` is no longer obligatory to be manually specified for user default to be able to have full access rights as was in 23.2. [#46241](https://github.com/ClickHouse/ClickHouse/pull/46241) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow nested custom disks. Previously custom disks supported only flat disk structure. [#47106](https://github.com/ClickHouse/ClickHouse/pull/47106) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Introduce a function `widthBucket` (with a `WIDTH_BUCKET` alias for compatibility). [#42974](https://github.com/ClickHouse/ClickHouse/issues/42974). [#46790](https://github.com/ClickHouse/ClickHouse/pull/46790) ([avoiderboi](https://github.com/avoiderboi)). +* Add new function `parseDateTime`/`parseDateTimeInJodaSyntax` according to the specified format string. parseDateTime parses String to DateTime in MySQL syntax, parseDateTimeInJodaSyntax parses in Joda syntax. [#46815](https://github.com/ClickHouse/ClickHouse/pull/46815) ([李扬](https://github.com/taiyang-li)). +* Use `dummy UInt8` for the default structure of table function `null`. Closes [#46930](https://github.com/ClickHouse/ClickHouse/issues/46930). [#47006](https://github.com/ClickHouse/ClickHouse/pull/47006) ([flynn](https://github.com/ucasfl)). +* Support for date format with a comma, like `Dec 15, 2021` in the `parseDateTimeBestEffort` function. Closes [#46816](https://github.com/ClickHouse/ClickHouse/issues/46816). [#47071](https://github.com/ClickHouse/ClickHouse/pull/47071) ([chen](https://github.com/xiedeyantu)). +* Add settings `http_wait_end_of_query` and `http_response_buffer_size` that corresponds to URL params `wait_end_of_query` and `buffer_size` for the HTTP interface. This allows changing these settings in the profiles. [#47108](https://github.com/ClickHouse/ClickHouse/pull/47108) ([Vladimir C](https://github.com/vdimir)). +* Add `system.dropped_tables` table that shows tables that were dropped from `Atomic` databases but were not completely removed yet. [#47364](https://github.com/ClickHouse/ClickHouse/pull/47364) ([chen](https://github.com/xiedeyantu)). +* Add `INSTR` as alias of `positionCaseInsensitive` for MySQL compatibility. Closes [#47529](https://github.com/ClickHouse/ClickHouse/issues/47529). [#47535](https://github.com/ClickHouse/ClickHouse/pull/47535) ([flynn](https://github.com/ucasfl)). +* Added `toDecimalString` function allowing to convert numbers to string with fixed precision. [#47838](https://github.com/ClickHouse/ClickHouse/pull/47838) ([Andrey Zvonov](https://github.com/zvonand)). +* Add a merge tree setting `max_number_of_mutations_for_replica`. It limits the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings). [#48047](https://github.com/ClickHouse/ClickHouse/pull/48047) ([Vladimir C](https://github.com/vdimir)). +* Add the Map-related function `mapFromArrays`, which allows the creation of a map from a pair of arrays. [#31125](https://github.com/ClickHouse/ClickHouse/pull/31125) ([李扬](https://github.com/taiyang-li)). +* Allow control of compression in Parquet/ORC/Arrow output formats, adds support for more compression input formats. This closes [#13541](https://github.com/ClickHouse/ClickHouse/issues/13541). [#47114](https://github.com/ClickHouse/ClickHouse/pull/47114) ([Kruglov Pavel](https://github.com/Avogar)). +* Add SSL User Certificate authentication to the native protocol. Closes [#47077](https://github.com/ClickHouse/ClickHouse/issues/47077). [#47596](https://github.com/ClickHouse/ClickHouse/pull/47596) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add *OrNull() and *OrZero() variants for `parseDateTime`, add alias `str_to_date` for MySQL parity. [#48000](https://github.com/ClickHouse/ClickHouse/pull/48000) ([Robert Schulze](https://github.com/rschu1ze)). +* Added operator `REGEXP` (similar to operators "LIKE", "IN", "MOD" etc.) for better compatibility with MySQL [#47869](https://github.com/ClickHouse/ClickHouse/pull/47869) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Performance Improvement +* Marks in memory are now compressed, using 3-6x less memory. [#47290](https://github.com/ClickHouse/ClickHouse/pull/47290) ([Michael Kolupaev](https://github.com/al13n321)). +* Backups for large numbers of files were unbelievably slow in previous versions. Not anymore. Now they are unbelievably fast. [#47251](https://github.com/ClickHouse/ClickHouse/pull/47251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Introduced a separate thread pool for backup's IO operations. This will allow scaling it independently of other pools and increase performance. [#47174](https://github.com/ClickHouse/ClickHouse/pull/47174) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Use MultiRead request and retries for collecting metadata at the final stage of backup processing. [#47243](https://github.com/ClickHouse/ClickHouse/pull/47243) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). If a backup and restoring data are both in S3 then server-side copy should be used from now on. [#47546](https://github.com/ClickHouse/ClickHouse/pull/47546) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed excessive reading in queries with `FINAL`. [#47801](https://github.com/ClickHouse/ClickHouse/pull/47801) ([Nikita Taranov](https://github.com/nickitat)). +* Setting `max_final_threads` would be set to the number of cores at server startup (by the same algorithm as used for `max_threads`). This improves the concurrency of `final` execution on servers with high number of CPUs. [#47915](https://github.com/ClickHouse/ClickHouse/pull/47915) ([Nikita Taranov](https://github.com/nickitat)). +* Allow executing reading pipeline for DIRECT dictionary with CLICKHOUSE source in multiple threads. To enable set `dictionary_use_async_executor=1` in `SETTINGS` section for source in `CREATE DICTIONARY` statement. [#47986](https://github.com/ClickHouse/ClickHouse/pull/47986) ([Vladimir C](https://github.com/vdimir)). +* Optimize one nullable key aggregate performance. [#45772](https://github.com/ClickHouse/ClickHouse/pull/45772) ([LiuNeng](https://github.com/liuneng1994)). +* Implemented lowercase `tokenbf_v1` index utilization for `hasTokenOrNull`, `hasTokenCaseInsensitive` and `hasTokenCaseInsensitiveOrNull`. [#46252](https://github.com/ClickHouse/ClickHouse/pull/46252) ([ltrk2](https://github.com/ltrk2)). +* Optimize functions `position` and `LIKE` by searching the first two chars using SIMD. [#46289](https://github.com/ClickHouse/ClickHouse/pull/46289) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize queries from the `system.detached_parts`, which could be significantly large. Added several sources with respect to the block size limitation; in each block, an IO thread pool is used to calculate the part size, i.e. to make syscalls in parallel. [#46624](https://github.com/ClickHouse/ClickHouse/pull/46624) ([Sema Checherinda](https://github.com/CheSema)). +* Increase the default value of `max_replicated_merges_in_queue` for ReplicatedMergeTree tables from 16 to 1000. It allows faster background merge operation on clusters with a very large number of replicas, such as clusters with shared storage in ClickHouse Cloud. [#47050](https://github.com/ClickHouse/ClickHouse/pull/47050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Updated `clickhouse-copier` to use `GROUP BY` instead of `DISTINCT` to get the list of partitions. For large tables, this reduced the select time from over 500s to under 1s. [#47386](https://github.com/ClickHouse/ClickHouse/pull/47386) ([Clayton McClure](https://github.com/cmcclure-twilio)). +* Fix performance degradation in `ASOF JOIN`. [#47544](https://github.com/ClickHouse/ClickHouse/pull/47544) ([Ongkong](https://github.com/ongkong)). +* Even more batching in Keeper. Improve performance by avoiding breaking batches on read requests. [#47978](https://github.com/ClickHouse/ClickHouse/pull/47978) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow PREWHERE for Merge with different DEFAULT expressions for columns. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). + +#### Experimental Feature +* Parallel replicas: Improved the overall performance by better utilizing the local replica, and forbid the reading with parallel replicas from non-replicated MergeTree by default. [#47858](https://github.com/ClickHouse/ClickHouse/pull/47858) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support filter push down to left table for JOIN with `Join`, `Dictionary` and `EmbeddedRocksDB` tables if the experimental Analyzer is enabled. [#47280](https://github.com/ClickHouse/ClickHouse/pull/47280) ([Maksim Kita](https://github.com/kitaisreal)). +* Now ReplicatedMergeTree with zero copy replication has less load to Keeper. [#47676](https://github.com/ClickHouse/ClickHouse/pull/47676) ([alesapin](https://github.com/alesapin)). +* Fix create materialized view with MaterializedPostgreSQL [#40807](https://github.com/ClickHouse/ClickHouse/pull/40807) ([Maksim Buren](https://github.com/maks-buren630501)). + +#### Improvement +* Enable `input_format_json_ignore_unknown_keys_in_named_tuple` by default. [#46742](https://github.com/ClickHouse/ClickHouse/pull/46742) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow errors to be ignored while pushing to MATERIALIZED VIEW (add new setting `materialized_views_ignore_errors`, by default to `false`, but it is set to `true` for flushing logs to `system.*_log` tables unconditionally). [#46658](https://github.com/ClickHouse/ClickHouse/pull/46658) ([Azat Khuzhin](https://github.com/azat)). +* Track the file queue of distributed sends in memory. [#45491](https://github.com/ClickHouse/ClickHouse/pull/45491) ([Azat Khuzhin](https://github.com/azat)). +* Now `X-ClickHouse-Query-Id` and `X-ClickHouse-Timezone` headers are added to responses in all queries via HTTP protocol. Previously it was done only for `SELECT` queries. [#46364](https://github.com/ClickHouse/ClickHouse/pull/46364) ([Anton Popov](https://github.com/CurtizJ)). +* External tables from `MongoDB`: support for connection to a replica set via a URI with a host:port enum and support for the readPreference option in MongoDB dictionaries. Example URI: mongodb://db0.example.com:27017,db1.example.com:27017,db2.example.com:27017/?replicaSet=myRepl&readPreference=primary. [#46524](https://github.com/ClickHouse/ClickHouse/pull/46524) ([artem-yadr](https://github.com/artem-yadr)). +* This improvement should be invisible for users. Re-implement projection analysis on top of query plan. Added setting `query_plan_optimize_projection=1` to switch between old and new version. Fixes [#44963](https://github.com/ClickHouse/ClickHouse/issues/44963). [#46537](https://github.com/ClickHouse/ClickHouse/pull/46537) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Use Parquet format v2 instead of v1 in output format by default. Add setting `output_format_parquet_version` to control parquet version, possible values `1.0`, `2.4`, `2.6`, `2.latest` (default). [#46617](https://github.com/ClickHouse/ClickHouse/pull/46617) ([Kruglov Pavel](https://github.com/Avogar)). +* It is now possible to use the new configuration syntax to configure Kafka topics with periods (`.`) in their name. [#46752](https://github.com/ClickHouse/ClickHouse/pull/46752) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix heuristics that check hyperscan patterns for problematic repeats. [#46819](https://github.com/ClickHouse/ClickHouse/pull/46819) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't report ZK node exists to system.errors when a block was created concurrently by a different replica. [#46820](https://github.com/ClickHouse/ClickHouse/pull/46820) ([Raúl Marín](https://github.com/Algunenano)). +* Increase the limit for opened files in `clickhouse-local`. It will be able to read from `web` tables on servers with a huge number of CPU cores. Do not back off reading from the URL table engine in case of too many opened files. This closes [#46852](https://github.com/ClickHouse/ClickHouse/issues/46852). [#46853](https://github.com/ClickHouse/ClickHouse/pull/46853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Exceptions thrown when numbers cannot be parsed now have an easier-to-read exception message. [#46917](https://github.com/ClickHouse/ClickHouse/pull/46917) ([Robert Schulze](https://github.com/rschu1ze)). +* Added update `system.backups` after every processed task to track the progress of backups. [#46989](https://github.com/ClickHouse/ClickHouse/pull/46989) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow types conversion in Native input format. Add settings `input_format_native_allow_types_conversion` that controls it (enabled by default). [#46990](https://github.com/ClickHouse/ClickHouse/pull/46990) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow IPv4 in the `range` function to generate IP ranges. [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Improve exception message when it's impossible to move a part from one volume/disk to another. [#47032](https://github.com/ClickHouse/ClickHouse/pull/47032) ([alesapin](https://github.com/alesapin)). +* Support `Bool` type in `JSONType` function. Previously `Null` type was mistakenly returned for bool values. [#47046](https://github.com/ClickHouse/ClickHouse/pull/47046) ([Anton Popov](https://github.com/CurtizJ)). +* Use `_request_body` parameter to configure predefined HTTP queries. [#47086](https://github.com/ClickHouse/ClickHouse/pull/47086) ([Constantine Peresypkin](https://github.com/pkit)). +* Automatic indentation in the built-in UI SQL editor when Enter is pressed. [#47113](https://github.com/ClickHouse/ClickHouse/pull/47113) ([Alexey Korepanov](https://github.com/alexkorep)). +* Self-extraction with 'sudo' will attempt to set uid and gid of extracted files to running user. [#47116](https://github.com/ClickHouse/ClickHouse/pull/47116) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Previously, the `repeat` function's second argument only accepted an unsigned integer type, which meant it could not accept values such as -1. This behavior differed from that of the Spark function. In this update, the repeat function has been modified to match the behavior of the Spark function. It now accepts the same types of inputs, including negative integers. Extensive testing has been performed to verify the correctness of the updated implementation. [#47134](https://github.com/ClickHouse/ClickHouse/pull/47134) ([KevinyhZou](https://github.com/KevinyhZou)). Note: the changelog entry was rewritten by ChatGPT. +* Remove `::__1` part from stacktraces. Display `std::basic_string ClickHouse release 23.2, 2023-02-23 #### Backward Incompatible Change @@ -140,7 +473,7 @@ * Upgrade Intel QPL from v0.3.0 to v1.0.0 2. Build libaccel-config and link it statically to QPL library instead of dynamically. [#45809](https://github.com/ClickHouse/ClickHouse/pull/45809) ([jasperzhu](https://github.com/jinjunzh)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)). * Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)). diff --git a/CMakeLists.txt b/CMakeLists.txt index cbb666b81c3..5d6ed75bb29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,8 +57,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # gcc10/gcc10/clang -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory" OR COMPILER_GCC) + # -fsanitize=memory is too heavy + if (SANITIZE STREQUAL "memory") set (RLIMIT_DATA 10000000000) # 10G endif() @@ -102,6 +102,17 @@ if (ENABLE_FUZZING) set (ENABLE_PROTOBUF 1) endif() +option (ENABLE_WOBOQ_CODEBROWSER "Build for woboq codebrowser" OFF) + +if (ENABLE_WOBOQ_CODEBROWSER) + set (ENABLE_EMBEDDED_COMPILER 0) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-poison-system-directories") + # woboq codebrowser uses clang tooling, and they could add default system + # clang includes, and later clang will warn for those added by itself + # includes. + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-poison-system-directories") +endif() + # Global libraries # See: # - default_libs.cmake @@ -121,6 +132,7 @@ if (ENABLE_COLORED_BUILD AND CMAKE_GENERATOR STREQUAL "Ninja") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always") # ... such manually setting of flags can be removed once CMake supports a variable to # activate colors in *all* build systems: https://gitlab.kitware.com/cmake/cmake/-/issues/15502 + # --> available since CMake 3.24: https://stackoverflow.com/a/73349744 endif () include (cmake/check_flags.cmake) @@ -134,24 +146,15 @@ if (COMPILER_CLANG) set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges") endif () - if (HAS_USE_CTOR_HOMING) - # For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/ - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing") - endif() + # See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/ + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing") endif() no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16 endif () -# If compiler has support for -Wreserved-identifier. It is difficult to detect by clang version, -# because there are two different branches of clang: clang and AppleClang. -# (AppleClang is not supported by ClickHouse, but some developers have misfortune to use it). -if (HAS_RESERVED_IDENTIFIER) - add_compile_definitions (HAS_RESERVED_IDENTIFIER) -endif () - option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF) @@ -178,32 +181,11 @@ else () set(NO_WHOLE_ARCHIVE --no-whole-archive) endif () -option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON) -if (OS_DARWIN) - # Disable the curl, azure, senry build on MacOS - set (ENABLE_CURL_BUILD OFF) -endif () - -# Ignored if `lld` is used -option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.") - if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") # Can be lld or ld-lld or lld-13 or /path/to/lld. - if (LINKER_NAME MATCHES "lld" AND OS_LINUX) + if (LINKER_NAME MATCHES "lld") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index") message (STATUS "Adding .gdb-index via --gdb-index linker option.") - # we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces - # http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932 - elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD) - find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable") - if (NOT GDB_ADD_INDEX_EXE) - set (USE_GDB_ADD_INDEX 0) - message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.") - else() - set (USE_GDB_ADD_INDEX 1) - message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}") - endif() endif () endif() @@ -235,7 +217,7 @@ endif () # Create BuildID when using lld. For other linkers it is created by default. # (NOTE: LINKER_NAME can be either path or name, and in different variants) -if (LINKER_NAME MATCHES "lld" AND OS_LINUX) +if (LINKER_NAME MATCHES "lld") # SHA1 is not cryptographically secure but it is the best what lld is offering. set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1") endif () @@ -288,8 +270,8 @@ endif () option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT}) if (ENABLE_BUILD_PATH_MAPPING) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") - set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") + set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.") + set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.") endif () option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF) @@ -301,59 +283,39 @@ if (ENABLE_BUILD_PROFILING) endif () endif () -set (CMAKE_CXX_STANDARD 20) -set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html +set (CMAKE_CXX_STANDARD 23) +set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_CXX_STANDARD_REQUIRED ON) set (CMAKE_C_STANDARD 11) -set (CMAKE_C_EXTENSIONS ON) +set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C set (CMAKE_C_STANDARD_REQUIRED ON) -if (COMPILER_GCC OR COMPILER_CLANG) - # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") -endif () - -# falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable -# benchmarks. -if (COMPILER_GCC OR COMPILER_CLANG) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") -endif () - -if (ARCH_AMD64) - # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, - # which makes benchmark results more stable. - set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries") - if (COMPILER_GCC) - # gcc is in assembler, need to add "-Wa," prefix - set(BRANCHES_WITHIN_32B_BOUNDARIES "-Wa,${BRANCHES_WITHIN_32B_BOUNDARIES}") - endif() - - include(CheckCXXCompilerFlag) - check_cxx_compiler_flag("${BRANCHES_WITHIN_32B_BOUNDARIES}" HAS_BRANCHES_WITHIN_32B_BOUNDARIES) - if (HAS_BRANCHES_WITHIN_32B_BOUNDARIES) - set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}") - endif() -endif() - -if (COMPILER_GCC) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcoroutines") -endif () - -# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc +# Compiler-specific coverage flags e.g. -fcoverage-mapping option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF) -if (WITH_COVERAGE AND COMPILER_CLANG) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") - # If we want to disable coverage for specific translation units - set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") -endif() +if (COMPILER_CLANG) + # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. + # See https://reviews.llvm.org/D112921 + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") -if (WITH_COVERAGE AND COMPILER_GCC) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage") - set(COVERAGE_OPTION "-lgcov") - set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage") -endif() + # falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable + # benchmarks. + set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") + + if (ARCH_AMD64) + # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, + # which makes benchmark results more stable. + set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries") + set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}") + endif() + + if (WITH_COVERAGE) + set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + # If we want to disable coverage for specific translation units + set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") + endif() +endif () set (COMPILER_FLAGS "${COMPILER_FLAGS}") @@ -391,13 +353,6 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers") - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) - # Set new experimental pass manager, it's a performance, build time and binary size win. - # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager") - endif () - # We cannot afford to use LTO when compiling unit tests, and it's not enough # to only supply -fno-lto at the final linking stage. So we disable it # completely. @@ -436,15 +391,22 @@ else() endif () option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON) -if (NOT OS_LINUX AND NOT OS_ANDROID) +# We use mmap for allocations more heavily in debug builds, +# but GWP-ASan also wants to use mmap frequently, +# and due to a large number of memory mappings, +# it does not work together well. +if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) set(ENABLE_GWP_ASAN OFF) endif () +option (ENABLE_FIU "Enable Fiu" ON) + option(WERROR "Enable -Werror compiler option" ON) if (WERROR) # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks. # Instead, adopt modern cmake usage requirement. + # TODO: Set CMAKE_COMPILE_WARNING_AS_ERROR (cmake 3.24) target_compile_options(global-group INTERFACE "-Werror") endif () @@ -459,8 +421,11 @@ endif () set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") -set (CMAKE_POSITION_INDEPENDENT_CODE OFF) -if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X)) +if (NOT SANITIZE) + set (CMAKE_POSITION_INDEPENDENT_CODE OFF) +endif() + +if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE) # Slightly more efficient code can be generated # It's disabled for ARM because otherwise ClickHouse cannot run on Android. set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") @@ -563,6 +528,26 @@ include (cmake/print_flags.cmake) if (ENABLE_RUST) add_subdirectory (rust) + + # With LTO Rust adds few symbols with global visiblity, the most common is + # rust_eh_personality. And this leads to linking errors because multiple + # Rust libraries contains the same symbol. + # + # If it was shared library, that we could use version script for linker to + # hide this symbols, but libraries are static. + # + # we could in theory compile everything to one library but this will be a + # mess + # + # But this should be OK since CI has lots of other builds that are done + # without LTO and it will find multiple definitions if there will be any. + # + # More information about this behaviour in Rust can be found here + # - https://github.com/rust-lang/rust/issues/44322 + # - https://alanwu.space/post/symbol-hygiene/ + if (ENABLE_THINLTO) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-multiple-definition") + endif() endif() add_subdirectory (base) @@ -583,7 +568,7 @@ if (NATIVE_BUILD_TARGETS ) message (STATUS "Building native targets...") - set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native") + set (NATIVE_BUILD_DIR "${PROJECT_BINARY_DIR}/native") execute_process( COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}" @@ -593,11 +578,11 @@ if (NATIVE_BUILD_TARGETS COMMAND ${CMAKE_COMMAND} "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" - "-DENABLE_CCACHE=${ENABLE_CCACHE}" + "-DCOMPILER_CACHE=${COMPILER_CACHE}" # Avoid overriding .cargo/config.toml with native toolchain. "-DENABLE_RUST=OFF" "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}" - ${CMAKE_SOURCE_DIR} + ${PROJECT_SOURCE_DIR} WORKING_DIRECTORY "${NATIVE_BUILD_DIR}" COMMAND_ECHO STDOUT) diff --git a/PreLoad.cmake b/PreLoad.cmake index 0e1ee70fc8f..b456c724cc6 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -19,8 +19,8 @@ endif() if (NOT "$ENV{CFLAGS}" STREQUAL "" OR NOT "$ENV{CXXFLAGS}" STREQUAL "" OR NOT "$ENV{LDFLAGS}" STREQUAL "" - OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS - OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT) + OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS + OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT) # if $ENV message("CFLAGS: $ENV{CFLAGS}") @@ -36,7 +36,6 @@ if (NOT "$ENV{CFLAGS}" STREQUAL "" message("CMAKE_C_FLAGS_INIT: ${CMAKE_C_FLAGS_INIT}") message("CMAKE_CXX_FLAGS_INIT: ${CMAKE_CXX_FLAGS_INIT}") message("CMAKE_EXE_LINKER_FLAGS_INIT: ${CMAKE_EXE_LINKER_FLAGS_INIT}") - message("CMAKE_SHARED_LINKER_FLAGS_INIT: ${CMAKE_SHARED_LINKER_FLAGS_INIT}") message("CMAKE_MODULE_LINKER_FLAGS_INIT: ${CMAKE_MODULE_LINKER_FLAGS_INIT}") message(FATAL_ERROR " diff --git a/README.md b/README.md index fcbe65e8223..bbedea364fc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com) +[ClickHouse — open source distributed column-oriented DBMS](https://clickhouse.com?utm_source=github) ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time. @@ -14,18 +14,32 @@ curl https://clickhouse.com/ | sh * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. +* [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming Events -* [**v23.2 Release Webinar**](https://clickhouse.com/company/events/v23-2-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-02) - Feb 23 - 23.2 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Amsterdam**](https://www.meetup.com/clickhouse-netherlands-user-group/events/291485868/) - Mar 9 - The first ClickHouse Amsterdam Meetup of 2023 is here! 🎉 Join us for short lightning talks and long discussions. Food, drinks & good times on us. -* [**ClickHouse Meetup in SF Bay Area**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/291490121/) - Mar 14 - A night to meet with ClickHouse team in the San Francisco area! Food and drink are a given...but networking is the primary focus. -* [**ClickHouse Meetup in Austin**](https://www.meetup.com/clickhouse-austin-user-group/events/291486654/) - Mar 16 - The first ClickHouse Meetup in Austin is happening soon! Interested in speaking, let us know! + +* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25 +* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25 +* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7 +* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13 + +Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. ## Recent Recordings -* **FOSDEM 2023**: In the "Fast and Streaming Data" room Alexey gave a talk entitled "Building Analytical Apps With ClickHouse" that looks at the landscape of data tools, an interesting data set, and how you can interact with data quickly. Check out the recording on **[YouTube](https://www.youtube.com/watch?v=JlcI2Vfz_uk)**. -* **Recording available**: [**v23.1 Release Webinar**](https://www.youtube.com/watch?v=zYSZXBnTMSE) 23.1 is the ClickHouse New Year release. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. Inverted indices, query cache, and so -- very -- much more. +* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" +* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now! +* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) + + + ## Interested in joining ClickHouse and making it your full time job? + +We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - we’ll definitely click! + +Check out our **current openings** here: https://clickhouse.com/company/careers + +Cant find what you are looking for, but want to let us know you are interested in joining ClickHouse? Email careers@clickhouse.com! diff --git a/SECURITY.md b/SECURITY.md index 7c6648c70eb..75c1a9d7d6a 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,20 +13,16 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.4 | ✔️ | +| 23.3 | ✔️ | | 23.2 | ✔️ | -| 23.1 | ✔️ | -| 22.12 | ✔️ | +| 23.1 | ❌ | +| 22.12 | ❌ | | 22.11 | ❌ | | 22.10 | ❌ | | 22.9 | ❌ | | 22.8 | ✔️ | -| 22.7 | ❌ | -| 22.6 | ❌ | -| 22.5 | ❌ | -| 22.4 | ❌ | -| 22.3 | ✔️ | -| 22.2 | ❌ | -| 22.1 | ❌ | +| 22.* | ❌ | | 21.* | ❌ | | 20.* | ❌ | | 19.* | ❌ | diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 64785d575c5..8ab3c8a0711 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -2,6 +2,10 @@ if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") endif () +# TODO: Remove this. We like to compile with C++23 (set by top-level CMakeLists) but Clang crashes with our libcxx +# when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16. +set (CMAKE_CXX_STANDARD 20) + set (SRCS argsToConfig.cpp coverage.cpp diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 22cb577b1b2..2405ba9ca0d 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -1,5 +1,6 @@ #pragma once #include +#include #if !defined(NO_SANITIZE_UNDEFINED) #if defined(__clang__) @@ -19,23 +20,6 @@ using Decimal64 = Decimal; using Decimal128 = Decimal; using Decimal256 = Decimal; -template -concept is_decimal = - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v; - -template -concept is_over_big_int = - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v; - template struct NativeTypeT { using Type = T; }; template struct NativeTypeT { using Type = typename T::NativeType; }; template using NativeType = typename NativeTypeT::Type; diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h new file mode 100644 index 00000000000..589d6224917 --- /dev/null +++ b/base/base/Decimal_fwd.h @@ -0,0 +1,46 @@ +#pragma once + +#include + +namespace wide +{ + +template +class integer; + +} + +using Int128 = wide::integer<128, signed>; +using UInt128 = wide::integer<128, unsigned>; +using Int256 = wide::integer<256, signed>; +using UInt256 = wide::integer<256, unsigned>; + +namespace DB +{ + +template struct Decimal; + +using Decimal32 = Decimal; +using Decimal64 = Decimal; +using Decimal128 = Decimal; +using Decimal256 = Decimal; + +class DateTime64; + +template +concept is_decimal = + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v; + +template +concept is_over_big_int = + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v; +} diff --git a/base/base/IPv4andIPv6.h b/base/base/IPv4andIPv6.h index 0e97d83b07e..7b745ec7b84 100644 --- a/base/base/IPv4andIPv6.h +++ b/base/base/IPv4andIPv6.h @@ -51,3 +51,15 @@ namespace DB }; } + +namespace std +{ + template <> + struct hash + { + size_t operator()(const DB::IPv6 & x) const + { + return std::hash()(x.toUnderType()); + } + }; +} diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 315bcce38da..4c6d97b4444 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -466,9 +466,8 @@ JSON::Pos JSON::searchField(const char * data, size_t size) const { if (!it->hasEscapes()) { - if (static_cast(size) + 2 > it->dataEnd() - it->data()) - continue; - if (!strncmp(data, it->data() + 1, size)) + const auto current_name = it->getRawName(); + if (current_name.size() == size && 0 == memcmp(current_name.data(), data, size)) break; } else diff --git a/base/base/StringRef.h b/base/base/StringRef.h index a3e32ff5058..f428b7c747f 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -3,6 +3,7 @@ #include #include // for std::logic_error #include +#include #include #include #include @@ -326,5 +327,16 @@ namespace ZeroTraits inline void set(StringRef & x) { x.size = 0; } } +namespace PackedZeroTraits +{ + template class PackedPairNoInit> + inline bool check(const PackedPairNoInit p) + { return 0 == p.key.size; } + + template class PackedPairNoInit> + inline void set(PackedPairNoInit & p) + { p.key.size = 0; } +} + std::ostream & operator<<(std::ostream & os, const StringRef & str); diff --git a/base/base/TypeList.h b/base/base/TypeList.h index 244403b1c6b..310f0c0c586 100644 --- a/base/base/TypeList.h +++ b/base/base/TypeList.h @@ -4,7 +4,6 @@ #include #include #include "defines.h" -#include "TypePair.h" /// General-purpose typelist. Easy on compilation times as it does not use recursion. template @@ -28,7 +27,7 @@ namespace TypeListUtils /// In some contexts it's more handy to use functions in constexpr Root changeRoot(TypeList) { return {}; } template - constexpr void forEach(TypeList, F && f) { (std::forward(f)(Id{}), ...); } + constexpr void forEach(TypeList, F && f) { (std::forward(f)(TypeList{}), ...); } } template diff --git a/base/base/TypePair.h b/base/base/TypePair.h deleted file mode 100644 index 8c2f380618c..00000000000 --- a/base/base/TypePair.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -template struct TypePair {}; -template struct Id {}; diff --git a/base/base/argsToConfig.cpp b/base/base/argsToConfig.cpp index d7983779d2d..faa1462218d 100644 --- a/base/base/argsToConfig.cpp +++ b/base/base/argsToConfig.cpp @@ -3,13 +3,29 @@ #include #include - -void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::LayeredConfiguration & config, int priority) +void argsToConfig(const Poco::Util::Application::ArgVec & argv, + Poco::Util::LayeredConfiguration & config, + int priority, + const std::unordered_set* alias_names) { /// Parsing all args and converting to config layer /// Test: -- --1=1 --1=2 --3 5 7 8 -9 10 -11=12 14= 15== --16==17 --=18 --19= --20 21 22 --23 --24 25 --26 -27 28 ---29=30 -- ----31 32 --33 3-4 Poco::AutoPtr map_config = new Poco::Util::MapConfiguration; std::string key; + + auto add_arg = [&map_config, &alias_names](const std::string & k, const std::string & v) + { + map_config->setString(k, v); + + if (alias_names && !alias_names->contains(k)) + { + std::string alias_key = k; + std::replace(alias_key.begin(), alias_key.end(), '-', '_'); + if (alias_names->contains(alias_key)) + map_config->setString(alias_key, v); + } + }; + for (const auto & arg : argv) { auto key_start = arg.find_first_not_of('-'); @@ -19,7 +35,7 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::Laye // old saved '--key', will set to some true value "1" if (!key.empty() && pos_minus != std::string::npos && pos_minus < key_start) { - map_config->setString(key, "1"); + add_arg(key, "1"); key = ""; } @@ -29,7 +45,7 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::Laye { if (pos_minus == std::string::npos || pos_minus > key_start) { - map_config->setString(key, arg); + add_arg(key, arg); } key = ""; } @@ -55,7 +71,7 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::Laye if (arg.size() > pos_eq) value = arg.substr(pos_eq + 1); - map_config->setString(key, value); + add_arg(key, value); key = ""; } diff --git a/base/base/argsToConfig.h b/base/base/argsToConfig.h index 9b7b44b7b7f..ef34a8a2145 100644 --- a/base/base/argsToConfig.h +++ b/base/base/argsToConfig.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include namespace Poco::Util { @@ -8,4 +10,7 @@ class LayeredConfiguration; // NOLINT(cppcoreguidelines-virtual-class-destructor } /// Import extra command line arguments to configuration. These are command line arguments after --. -void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::LayeredConfiguration & config, int priority); +void argsToConfig(const Poco::Util::Application::ArgVec & argv, + Poco::Util::LayeredConfiguration & config, + int priority, + const std::unordered_set* registered_alias_names = nullptr); diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index 043f97f9593..1027638be3d 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -2,6 +2,8 @@ #if WITH_COVERAGE +#pragma GCC diagnostic ignored "-Wreserved-identifier" + # include # include diff --git a/base/base/defines.h b/base/base/defines.h index 91c35dc28b6..6abf8155b95 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -73,18 +73,6 @@ # endif #endif -#if defined(ADDRESS_SANITIZER) -# define BOOST_USE_ASAN 1 -# define BOOST_USE_UCONTEXT 1 -#endif - -#if defined(THREAD_SANITIZER) -# define BOOST_USE_TSAN 1 -# define BOOST_USE_UCONTEXT 1 -#endif - -/// TODO: Strange enough, there is no way to detect UB sanitizer. - /// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute. /// It is useful in case when compiler cannot see (and exploit) it, but UBSan can. /// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index 83f53773ae7..a8747ecc9b7 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -34,9 +34,51 @@ * If no such characters, returns nullptr. */ +struct SearchSymbols +{ + static constexpr auto BUFFER_SIZE = 16; + + SearchSymbols() = default; + + explicit SearchSymbols(std::string in) + : str(std::move(in)) + { +#if defined(__SSE4_2__) + if (str.size() > BUFFER_SIZE) + { + throw std::runtime_error("SearchSymbols can contain at most " + std::to_string(BUFFER_SIZE) + " symbols and " + std::to_string(str.size()) + " was provided\n"); + } + + char tmp_safety_buffer[BUFFER_SIZE] = {0}; + + memcpy(tmp_safety_buffer, str.data(), str.size()); + + simd_vector = _mm_loadu_si128(reinterpret_cast(tmp_safety_buffer)); +#endif + } + +#if defined(__SSE4_2__) + __m128i simd_vector; +#endif + std::string str; +}; + namespace detail { -template constexpr bool is_in(char x) { return ((x == chars) || ...); } +template constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression) + +static bool is_in(char c, const char * symbols, size_t num_chars) +{ + for (size_t i = 0u; i < num_chars; ++i) + { + if (c == symbols[i]) + { + return true; + } + } + + return false; +} #if defined(__SSE2__) template @@ -53,6 +95,43 @@ inline __m128i mm_is_in(__m128i bytes) __m128i eq = mm_is_in(bytes); return _mm_or_si128(eq0, eq); } + +inline __m128i mm_is_in(__m128i bytes, const char * symbols, size_t num_chars) +{ + __m128i accumulator = _mm_setzero_si128(); + for (size_t i = 0; i < num_chars; ++i) + { + __m128i eq = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(symbols[i])); + accumulator = _mm_or_si128(accumulator, eq); + } + + return accumulator; +} + +inline std::array<__m128i, 16u> mm_is_in_prepare(const char * symbols, size_t num_chars) +{ + std::array<__m128i, 16u> result {}; + + for (size_t i = 0; i < num_chars; ++i) + { + result[i] = _mm_set1_epi8(symbols[i]); + } + + return result; +} + +inline __m128i mm_is_in_execute(__m128i bytes, const std::array<__m128i, 16u> & needles) +{ + __m128i accumulator = _mm_setzero_si128(); + + for (const auto & needle : needles) + { + __m128i eq = _mm_cmpeq_epi8(bytes, needle); + accumulator = _mm_or_si128(accumulator, eq); + } + + return accumulator; +} #endif template @@ -99,6 +178,32 @@ inline const char * find_first_symbols_sse2(const char * const begin, const char return return_mode == ReturnMode::End ? end : nullptr; } +template +inline const char * find_first_symbols_sse2(const char * const begin, const char * const end, const char * symbols, size_t num_chars) +{ + const char * pos = begin; + +#if defined(__SSE2__) + const auto needles = mm_is_in_prepare(symbols, num_chars); + for (; pos + 15 < end; pos += 16) + { + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); + + __m128i eq = mm_is_in_execute(bytes, needles); + + uint16_t bit_mask = maybe_negate(uint16_t(_mm_movemask_epi8(eq))); + if (bit_mask) + return pos + __builtin_ctz(bit_mask); + } +#endif + + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos, symbols, num_chars))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; +} + template inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) @@ -159,26 +264,61 @@ inline const char * find_first_symbols_sse42(const char * const begin, const cha #endif for (; pos < end; ++pos) - if ( (num_chars >= 1 && maybe_negate(*pos == c01)) - || (num_chars >= 2 && maybe_negate(*pos == c02)) - || (num_chars >= 3 && maybe_negate(*pos == c03)) - || (num_chars >= 4 && maybe_negate(*pos == c04)) - || (num_chars >= 5 && maybe_negate(*pos == c05)) - || (num_chars >= 6 && maybe_negate(*pos == c06)) - || (num_chars >= 7 && maybe_negate(*pos == c07)) - || (num_chars >= 8 && maybe_negate(*pos == c08)) - || (num_chars >= 9 && maybe_negate(*pos == c09)) - || (num_chars >= 10 && maybe_negate(*pos == c10)) - || (num_chars >= 11 && maybe_negate(*pos == c11)) - || (num_chars >= 12 && maybe_negate(*pos == c12)) - || (num_chars >= 13 && maybe_negate(*pos == c13)) - || (num_chars >= 14 && maybe_negate(*pos == c14)) - || (num_chars >= 15 && maybe_negate(*pos == c15)) - || (num_chars >= 16 && maybe_negate(*pos == c16))) + if ( (num_chars == 1 && maybe_negate(is_in(*pos))) + || (num_chars == 2 && maybe_negate(is_in(*pos))) + || (num_chars == 3 && maybe_negate(is_in(*pos))) + || (num_chars == 4 && maybe_negate(is_in(*pos))) + || (num_chars == 5 && maybe_negate(is_in(*pos))) + || (num_chars == 6 && maybe_negate(is_in(*pos))) + || (num_chars == 7 && maybe_negate(is_in(*pos))) + || (num_chars == 8 && maybe_negate(is_in(*pos))) + || (num_chars == 9 && maybe_negate(is_in(*pos))) + || (num_chars == 10 && maybe_negate(is_in(*pos))) + || (num_chars == 11 && maybe_negate(is_in(*pos))) + || (num_chars == 12 && maybe_negate(is_in(*pos))) + || (num_chars == 13 && maybe_negate(is_in(*pos))) + || (num_chars == 14 && maybe_negate(is_in(*pos))) + || (num_chars == 15 && maybe_negate(is_in(*pos))) + || (num_chars == 16 && maybe_negate(is_in(*pos)))) return pos; return return_mode == ReturnMode::End ? end : nullptr; } +template +inline const char * find_first_symbols_sse42(const char * const begin, const char * const end, const SearchSymbols & symbols) +{ + const char * pos = begin; + + const auto num_chars = symbols.str.size(); + +#if defined(__SSE4_2__) + constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT; + + const __m128i set = symbols.simd_vector; + + for (; pos + 15 < end; pos += 16) + { + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); + + if constexpr (positive) + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, mode)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode); + } + else + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY); + } + } +#endif + + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos, symbols.str.data(), num_chars))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; +} /// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do. @@ -194,6 +334,17 @@ inline const char * find_first_symbols_dispatch(const char * begin, const char * return find_first_symbols_sse2(begin, end); } +template +inline const char * find_first_symbols_dispatch(const std::string_view haystack, const SearchSymbols & symbols) +{ +#if defined(__SSE4_2__) + if (symbols.str.size() >= 5) + return find_first_symbols_sse42(haystack.begin(), haystack.end(), symbols); + else +#endif + return find_first_symbols_sse2(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size()); +} + } @@ -211,6 +362,11 @@ inline char * find_first_symbols(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_symbols(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} + template inline const char * find_first_not_symbols(const char * begin, const char * end) { @@ -223,6 +379,11 @@ inline char * find_first_not_symbols(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_not_symbols(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} + template inline const char * find_first_symbols_or_null(const char * begin, const char * end) { @@ -235,6 +396,11 @@ inline char * find_first_symbols_or_null(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_symbols_or_null(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} + template inline const char * find_first_not_symbols_or_null(const char * begin, const char * end) { @@ -247,6 +413,10 @@ inline char * find_first_not_symbols_or_null(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_not_symbols_or_null(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} template inline const char * find_last_symbols_or_null(const char * begin, const char * end) diff --git a/base/base/hex.h b/base/base/hex.h new file mode 100644 index 00000000000..b8cf95db893 --- /dev/null +++ b/base/base/hex.h @@ -0,0 +1,215 @@ +#pragma once + +#include +#include +#include "types.h" + +/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly. + +constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF"; +constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef"; + +constexpr char hexDigitUppercase(unsigned char c) +{ + return hex_digit_to_char_uppercase_table[c]; +} +constexpr char hexDigitLowercase(unsigned char c) +{ + return hex_digit_to_char_lowercase_table[c]; +} + +/// Maps 0..255 to 00..FF or 00..ff correspondingly + +constexpr inline std::string_view hex_byte_to_char_uppercase_table = // + "000102030405060708090A0B0C0D0E0F" + "101112131415161718191A1B1C1D1E1F" + "202122232425262728292A2B2C2D2E2F" + "303132333435363738393A3B3C3D3E3F" + "404142434445464748494A4B4C4D4E4F" + "505152535455565758595A5B5C5D5E5F" + "606162636465666768696A6B6C6D6E6F" + "707172737475767778797A7B7C7D7E7F" + "808182838485868788898A8B8C8D8E8F" + "909192939495969798999A9B9C9D9E9F" + "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" + "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" + "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" + "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" + "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" + "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; + +constexpr inline std::string_view hex_byte_to_char_lowercase_table = // + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + +inline void writeHexByteUppercase(UInt8 byte, void * out) +{ + memcpy(out, &hex_byte_to_char_uppercase_table[static_cast(byte) * 2], 2); +} + +inline void writeHexByteLowercase(UInt8 byte, void * out) +{ + memcpy(out, &hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); +} + +constexpr inline std::string_view bin_byte_to_char_table = // + "0000000000000001000000100000001100000100000001010000011000000111" + "0000100000001001000010100000101100001100000011010000111000001111" + "0001000000010001000100100001001100010100000101010001011000010111" + "0001100000011001000110100001101100011100000111010001111000011111" + "0010000000100001001000100010001100100100001001010010011000100111" + "0010100000101001001010100010101100101100001011010010111000101111" + "0011000000110001001100100011001100110100001101010011011000110111" + "0011100000111001001110100011101100111100001111010011111000111111" + "0100000001000001010000100100001101000100010001010100011001000111" + "0100100001001001010010100100101101001100010011010100111001001111" + "0101000001010001010100100101001101010100010101010101011001010111" + "0101100001011001010110100101101101011100010111010101111001011111" + "0110000001100001011000100110001101100100011001010110011001100111" + "0110100001101001011010100110101101101100011011010110111001101111" + "0111000001110001011100100111001101110100011101010111011001110111" + "0111100001111001011110100111101101111100011111010111111001111111" + "1000000010000001100000101000001110000100100001011000011010000111" + "1000100010001001100010101000101110001100100011011000111010001111" + "1001000010010001100100101001001110010100100101011001011010010111" + "1001100010011001100110101001101110011100100111011001111010011111" + "1010000010100001101000101010001110100100101001011010011010100111" + "1010100010101001101010101010101110101100101011011010111010101111" + "1011000010110001101100101011001110110100101101011011011010110111" + "1011100010111001101110101011101110111100101111011011111010111111" + "1100000011000001110000101100001111000100110001011100011011000111" + "1100100011001001110010101100101111001100110011011100111011001111" + "1101000011010001110100101101001111010100110101011101011011010111" + "1101100011011001110110101101101111011100110111011101111011011111" + "1110000011100001111000101110001111100100111001011110011011100111" + "1110100011101001111010101110101111101100111011011110111011101111" + "1111000011110001111100101111001111110100111101011111011011110111" + "1111100011111001111110101111101111111100111111011111111011111111"; + +inline void writeBinByte(UInt8 byte, void * out) +{ + memcpy(out, &bin_byte_to_char_table[static_cast(byte) * 8], 8); +} + +/// Produces hex representation of an unsigned int with leading zeros (for checksums) +template +inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table) +{ + union + { + TUInt value; + UInt8 uint8[sizeof(TUInt)]; + }; + + value = uint_; + + for (size_t i = 0; i < sizeof(TUInt); ++i) + { + if constexpr (std::endian::native == std::endian::little) + memcpy(out + i * 2, &table[static_cast(uint8[sizeof(TUInt) - 1 - i]) * 2], 2); + else + memcpy(out + i * 2, &table[static_cast(uint8[i]) * 2], 2); + } +} + +template +inline void writeHexUIntUppercase(TUInt uint_, char * out) +{ + writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table); +} + +template +inline void writeHexUIntLowercase(TUInt uint_, char * out) +{ + writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table); +} + +template +std::string getHexUIntUppercase(TUInt uint_) +{ + std::string res(sizeof(TUInt) * 2, '\0'); + writeHexUIntUppercase(uint_, res.data()); + return res; +} + +template +std::string getHexUIntLowercase(TUInt uint_) +{ + std::string res(sizeof(TUInt) * 2, '\0'); + writeHexUIntLowercase(uint_, res.data()); + return res; +} + +/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value. + +constexpr inline std::string_view hex_char_to_digit_table + = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + 256}; + +constexpr UInt8 unhex(char c) +{ + return hex_char_to_digit_table[static_cast(c)]; +} + +constexpr UInt8 unhex2(const char * data) +{ + return static_cast(unhex(data[0])) * 0x10 + static_cast(unhex(data[1])); +} + +constexpr UInt16 unhex4(const char * data) +{ + return static_cast(unhex(data[0])) * 0x1000 + static_cast(unhex(data[1])) * 0x100 + + static_cast(unhex(data[2])) * 0x10 + static_cast(unhex(data[3])); +} + +template +constexpr TUInt unhexUInt(const char * data) +{ + TUInt res = 0; + if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0)) + { + for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data) + { + res <<= 4; + res += unhex(*data); + } + } + else + { + for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16) + { + res <<= 64; + res += unhexUInt(data); + } + } + return res; +} diff --git a/base/base/interpolate.h b/base/base/interpolate.h new file mode 100644 index 00000000000..1d4fc0b6257 --- /dev/null +++ b/base/base/interpolate.h @@ -0,0 +1,13 @@ +#pragma once +#include +#include + +/** Linear interpolation in logarithmic coordinates. + * Exponential interpolation is related to linear interpolation + * exactly in same way as geometric mean is related to arithmetic mean. + */ +constexpr double interpolateExponential(double min, double max, double ratio) +{ + assert(min > 0 && ratio >= 0 && ratio <= 1); + return min * std::pow(max / min, ratio); +} diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index c3d7fed2d3f..7d37f01b560 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -1,6 +1,4 @@ -#ifdef HAS_RESERVED_IDENTIFIER #pragma clang diagnostic ignored "-Wreserved-identifier" -#endif /// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/ diff --git a/base/base/strong_typedef.h b/base/base/strong_typedef.h index 2ddea6412f5..b3b8bced688 100644 --- a/base/base/strong_typedef.h +++ b/base/base/strong_typedef.h @@ -35,7 +35,7 @@ public: Self & operator=(T && rhs) { t = std::move(rhs); return *this;} // NOLINTBEGIN(google-explicit-constructor) - operator const T & () const { return t; } + constexpr operator const T & () const { return t; } operator T & () { return t; } // NOLINTEND(google-explicit-constructor) diff --git a/base/base/unaligned.h b/base/base/unaligned.h index fcaaa38f2fe..3ab25c803bb 100644 --- a/base/base/unaligned.h +++ b/base/base/unaligned.h @@ -5,44 +5,6 @@ #include -inline void reverseMemcpy(void * dst, const void * src, size_t size) -{ - uint8_t * uint_dst = reinterpret_cast(dst); - const uint8_t * uint_src = reinterpret_cast(src); - - uint_dst += size; - while (size) - { - --uint_dst; - *uint_dst = *uint_src; - ++uint_src; - --size; - } -} - -template -inline T unalignedLoadLE(const void * address) -{ - T res {}; - if constexpr (std::endian::native == std::endian::little) - memcpy(&res, address, sizeof(res)); - else - reverseMemcpy(&res, address, sizeof(res)); - return res; -} - - -template -inline void unalignedStoreLE(void * address, - const typename std::enable_if::type & src) -{ - static_assert(std::is_trivially_copyable_v); - if constexpr (std::endian::native == std::endian::little) - memcpy(address, &src, sizeof(src)); - else - reverseMemcpy(address, &src, sizeof(src)); -} - template inline T unalignedLoad(const void * address) { @@ -62,3 +24,70 @@ inline void unalignedStore(void * address, static_assert(std::is_trivially_copyable_v); memcpy(address, &src, sizeof(src)); } + + +inline void reverseMemcpy(void * dst, const void * src, size_t size) +{ + uint8_t * uint_dst = reinterpret_cast(dst); + const uint8_t * uint_src = reinterpret_cast(src); + + uint_dst += size; + while (size) + { + --uint_dst; + *uint_dst = *uint_src; + ++uint_src; + --size; + } +} + +template +inline T unalignedLoadEndian(const void * address) +{ + T res {}; + if constexpr (std::endian::native == endian) + memcpy(&res, address, sizeof(res)); + else + reverseMemcpy(&res, address, sizeof(res)); + return res; +} + + +template +inline void unalignedStoreEndian(void * address, T & src) +{ + static_assert(std::is_trivially_copyable_v); + if constexpr (std::endian::native == endian) + memcpy(address, &src, sizeof(src)); + else + reverseMemcpy(address, &src, sizeof(src)); +} + + +template +inline T unalignedLoadLittleEndian(const void * address) +{ + return unalignedLoadEndian(address); +} + + +template +inline void unalignedStoreLittleEndian(void * address, + const typename std::enable_if::type & src) +{ + unalignedStoreEndian(address, src); +} + +template +inline T unalignedLoadBigEndian(const void * address) +{ + return unalignedLoadEndian(address); +} + + +template +inline void unalignedStoreBigEndian(void * address, + const typename std::enable_if::type & src) +{ + unalignedStoreEndian(address, src); +} diff --git a/base/base/unit.h b/base/base/unit.h index 1fb530be1f0..0fc314af479 100644 --- a/base/base/unit.h +++ b/base/base/unit.h @@ -5,10 +5,8 @@ constexpr size_t KiB = 1024; constexpr size_t MiB = 1024 * KiB; constexpr size_t GiB = 1024 * MiB; -#ifdef HAS_RESERVED_IDENTIFIER -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wreserved-identifier" -#endif +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" // NOLINTBEGIN(google-runtime-int) constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; } @@ -16,6 +14,4 @@ constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; } constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; } // NOLINTEND(google-runtime-int) -#ifdef HAS_RESERVED_IDENTIFIER -# pragma clang diagnostic pop -#endif +#pragma clang diagnostic pop diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 4a54c0fb2a4..4a80c176829 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -155,13 +155,13 @@ struct common_type, Arithmetic> std::is_floating_point_v, Arithmetic, std::conditional_t< - sizeof(Arithmetic) < Bits * sizeof(long), + sizeof(Arithmetic) * 8 < Bits, wide::integer, std::conditional_t< - Bits * sizeof(long) < sizeof(Arithmetic), + Bits < sizeof(Arithmetic) * 8, Arithmetic, std::conditional_t< - Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v || std::is_signed_v), + Bits == sizeof(Arithmetic) * 8 && (std::is_same_v || std::is_signed_v), Arithmetic, wide::integer>>>>; }; @@ -314,7 +314,14 @@ struct integer::_impl const T alpha = t / static_cast(max_int); - if (alpha <= static_cast(max_int)) + /** Here we have to use strict comparison. + * The max_int is 2^64 - 1. + * When casted to floating point type, it will be rounded to the closest representable number, + * which is 2^64. + * But 2^64 is not representable in uint64_t, + * so the maximum representable number will be strictly less. + */ + if (alpha < static_cast(max_int)) self = static_cast(alpha); else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations. set_multiplier(self, static_cast(alpha)); @@ -732,9 +739,10 @@ public: if (std::numeric_limits::is_signed && (is_negative(lhs) != is_negative(rhs))) return is_negative(rhs); + integer t = rhs; for (unsigned i = 0; i < item_count; ++i) { - base_type rhs_item = get_item(rhs, big(i)); + base_type rhs_item = get_item(t, big(i)); if (lhs.items[big(i)] != rhs_item) return lhs.items[big(i)] > rhs_item; @@ -757,9 +765,10 @@ public: if (std::numeric_limits::is_signed && (is_negative(lhs) != is_negative(rhs))) return is_negative(lhs); + integer t = rhs; for (unsigned i = 0; i < item_count; ++i) { - base_type rhs_item = get_item(rhs, big(i)); + base_type rhs_item = get_item(t, big(i)); if (lhs.items[big(i)] != rhs_item) return lhs.items[big(i)] < rhs_item; @@ -779,9 +788,10 @@ public: { if constexpr (should_keep_size()) { + integer t = rhs; for (unsigned i = 0; i < item_count; ++i) { - base_type rhs_item = get_item(rhs, any(i)); + base_type rhs_item = get_item(t, any(i)); if (lhs.items[any(i)] != rhs_item) return false; @@ -1239,7 +1249,7 @@ constexpr integer::operator long double() const noexcept for (unsigned i = 0; i < _impl::item_count; ++i) { long double t = res; - res *= std::numeric_limits::max(); + res *= static_cast(std::numeric_limits::max()); res += t; res += tmp.items[_impl::big(i)]; } diff --git a/base/base/wide_integer_to_string.h b/base/base/wide_integer_to_string.h index 160bf599516..c2cbe8d82e3 100644 --- a/base/base/wide_integer_to_string.h +++ b/base/base/wide_integer_to_string.h @@ -64,6 +64,6 @@ struct fmt::formatter> template auto format(const wide::integer & value, FormatContext & ctx) { - return format_to(ctx.out(), "{}", to_string(value)); + return fmt::format_to(ctx.out(), "{}", to_string(value)); } }; diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index 7e8ea5051d7..49bb81a58be 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -235,6 +235,17 @@ ssize_t getrandom(void *buf, size_t buflen, unsigned flags) return syscall(SYS_getrandom, buf, buflen, flags); } +/* Structure for scatter/gather I/O. */ +struct iovec +{ + void *iov_base; /* Pointer to data. */ + size_t iov_len; /* Length of data. */ +}; + +ssize_t preadv(int __fd, const struct iovec *__iovec, int __count, __off_t __offset) +{ + return syscall(SYS_preadv, __fd, __iovec, __count, (long)(__offset), (long)(__offset>>32)); +} #include #include diff --git a/base/glibc-compatibility/musl/expf.c b/base/glibc-compatibility/musl/expf.c new file mode 100644 index 00000000000..0a59236d1c0 --- /dev/null +++ b/base/glibc-compatibility/musl/expf.c @@ -0,0 +1,81 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include "libm.h" + +static const float + half[2] = {0.5,-0.5}, + ln2hi = 6.9314575195e-1f, /* 0x3f317200 */ + ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */ + invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */ + /* + * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]: + * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74 + */ + P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */ + P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */ + +float expf(float x) +{ + float_t hi, lo, c, xx, y; + int k, sign; + uint32_t hx; + + GET_FLOAT_WORD(hx, x); + sign = hx >> 31; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */ + if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */ + /* overflow */ + x *= 0x1p127f; + return x; + } + if (sign) { + /* underflow */ + FORCE_EVAL(-0x1p-149f/x); + if (hx >= 0x42cff1b5) /* x <= -103.972084f */ + return 0; + } + } + + /* argument reduction */ + if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */ + if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */ + k = invln2*x + half[sign]; + else + k = 1 - sign - sign; + hi = x - k*ln2hi; /* k*ln2hi is exact here */ + lo = k*ln2lo; + x = hi - lo; + } else if (hx > 0x39000000) { /* |x| > 2**-14 */ + k = 0; + hi = x; + lo = 0; + } else { + /* raise inexact */ + FORCE_EVAL(0x1p127f + x); + return 1 + x; + } + + /* x is now in primary range */ + xx = x*x; + c = x - xx*(P1+xx*P2); + y = 1 + (x*c/(2-c) - lo + hi); + if (k == 0) + return y; + return scalbnf(y, k); +} \ No newline at end of file diff --git a/base/glibc-compatibility/musl/logf.c b/base/glibc-compatibility/musl/logf.c index 7ee5d7fe623..e4c2237caa2 100644 --- a/base/glibc-compatibility/musl/logf.c +++ b/base/glibc-compatibility/musl/logf.c @@ -53,7 +53,7 @@ float logf(float x) tmp = ix - OFF; i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; k = (int32_t)tmp >> 23; /* arithmetic shift */ - iz = ix - (tmp & 0x1ff << 23); + iz = ix - (tmp & 0xff800000); invc = T[i].invc; logc = T[i].logc; z = (double_t)asfloat(iz); diff --git a/base/glibc-compatibility/musl/scalbnf.c b/base/glibc-compatibility/musl/scalbnf.c new file mode 100644 index 00000000000..cf56cacfb5f --- /dev/null +++ b/base/glibc-compatibility/musl/scalbnf.c @@ -0,0 +1,31 @@ +#include +#include + +float scalbnf(float x, int n) +{ + union {float f; uint32_t i;} u; + float_t y = x; + + if (n > 127) { + y *= 0x1p127f; + n -= 127; + if (n > 127) { + y *= 0x1p127f; + n -= 127; + if (n > 127) + n = 127; + } + } else if (n < -126) { + y *= 0x1p-126f; + n += 126; + if (n < -126) { + y *= 0x1p-126f; + n += 126; + if (n < -126) + n = -126; + } + } + u.i = (uint32_t)(0x7f+n)<<23; + x = y * u.f; + return x; +} diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 6112f9a339c..78796ca0c05 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -31,7 +31,8 @@ TRAP(argp_state_help) TRAP(argp_usage) TRAP(asctime) TRAP(clearenv) -TRAP(crypt) +// Redefined at contrib/libbcrypt/crypt_blowfish/wrapper.c:186 +// TRAP(crypt) TRAP(ctime) TRAP(cuserid) TRAP(drand48) diff --git a/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h b/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h index 2a72861a84e..d051ef1b768 100644 --- a/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h +++ b/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h @@ -90,20 +90,6 @@ namespace Crypto std::string groupName() const; /// Returns the EC key group name. - void save(const std::string & publicKeyFile, const std::string & privateKeyFile = "", const std::string & privateKeyPassphrase = "") - const; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - void - save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream = 0, const std::string & privateKeyPassphrase = "") const; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - static std::string getCurveName(int nid = -1); /// Returns elliptical curve name corresponding to /// the given nid; if nid is not found, returns @@ -150,22 +136,6 @@ namespace Crypto { return OBJ_nid2sn(groupId()); } - - - inline void - ECKeyImpl::save(const std::string & publicKeyFile, const std::string & privateKeyFile, const std::string & privateKeyPassphrase) const - { - EVPPKey(_pEC).save(publicKeyFile, privateKeyFile, privateKeyPassphrase); - } - - - inline void - ECKeyImpl::save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream, const std::string & privateKeyPassphrase) const - { - EVPPKey(_pEC).save(pPublicKeyStream, pPrivateKeyStream, privateKeyPassphrase); - } - - } } // namespace Poco::Crypto diff --git a/base/poco/Crypto/include/Poco/Crypto/KeyPair.h b/base/poco/Crypto/include/Poco/Crypto/KeyPair.h index 36adbec6a4d..291a0f8b749 100644 --- a/base/poco/Crypto/include/Poco/Crypto/KeyPair.h +++ b/base/poco/Crypto/include/Poco/Crypto/KeyPair.h @@ -56,24 +56,6 @@ namespace Crypto virtual int size() const; /// Returns the RSA modulus size. - virtual void save( - const std::string & publicKeyPairFile, - const std::string & privateKeyPairFile = "", - const std::string & privateKeyPairPassphrase = "") const; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - virtual void save( - std::ostream * pPublicKeyPairStream, - std::ostream * pPrivateKeyPairStream = 0, - const std::string & privateKeyPairPassphrase = "") const; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - KeyPairImpl::Ptr impl() const; /// Returns the impl object. @@ -97,21 +79,6 @@ namespace Crypto return _pImpl->size(); } - - inline void - KeyPair::save(const std::string & publicKeyFile, const std::string & privateKeyFile, const std::string & privateKeyPassphrase) const - { - _pImpl->save(publicKeyFile, privateKeyFile, privateKeyPassphrase); - } - - - inline void - KeyPair::save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream, const std::string & privateKeyPassphrase) const - { - _pImpl->save(pPublicKeyStream, pPrivateKeyStream, privateKeyPassphrase); - } - - inline const std::string & KeyPair::name() const { return _pImpl->name(); diff --git a/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h b/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h index 155efd20b9c..ecafbef0241 100644 --- a/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h +++ b/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h @@ -55,22 +55,6 @@ namespace Crypto virtual int size() const = 0; /// Returns the key size. - virtual void save( - const std::string & publicKeyFile, - const std::string & privateKeyFile = "", - const std::string & privateKeyPassphrase = "") const = 0; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - virtual void save( - std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream = 0, const std::string & privateKeyPassphrase = "") const = 0; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - const std::string & name() const; /// Returns key pair name diff --git a/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h b/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h index 4ccbb324c06..010c68bacd7 100644 --- a/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h +++ b/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h @@ -96,20 +96,6 @@ namespace Crypto ByteVec decryptionExponent() const; /// Returns the RSA decryption exponent. - void save(const std::string & publicKeyFile, const std::string & privateKeyFile = "", const std::string & privateKeyPassphrase = "") - const; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - void - save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream = 0, const std::string & privateKeyPassphrase = "") const; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - private: RSAKeyImpl(); @@ -139,4 +125,4 @@ namespace Crypto } // namespace Poco::Crypto -#endif // Crypto_RSAKeyImplImpl_INCLUDED \ No newline at end of file +#endif // Crypto_RSAKeyImplImpl_INCLUDED diff --git a/base/poco/Crypto/src/RSAKeyImpl.cpp b/base/poco/Crypto/src/RSAKeyImpl.cpp index eb6e758343a..229a3bce828 100644 --- a/base/poco/Crypto/src/RSAKeyImpl.cpp +++ b/base/poco/Crypto/src/RSAKeyImpl.cpp @@ -269,103 +269,6 @@ RSAKeyImpl::ByteVec RSAKeyImpl::decryptionExponent() const } -void RSAKeyImpl::save(const std::string& publicKeyFile, - const std::string& privateKeyFile, - const std::string& privateKeyPassphrase) const -{ - if (!publicKeyFile.empty()) - { - BIO* bio = BIO_new(BIO_s_file()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing public key file", publicKeyFile); - try - { - if (BIO_write_filename(bio, const_cast(publicKeyFile.c_str()))) - { - if (!PEM_write_bio_RSAPublicKey(bio, _pRSA)) - throw Poco::WriteFileException("Failed to write public key to file", publicKeyFile); - } - else throw Poco::CreateFileException("Cannot create public key file"); - } - catch (...) - { - BIO_free(bio); - throw; - } - BIO_free(bio); - } - - if (!privateKeyFile.empty()) - { - BIO* bio = BIO_new(BIO_s_file()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing private key file", privateKeyFile); - try - { - if (BIO_write_filename(bio, const_cast(privateKeyFile.c_str()))) - { - int rc = 0; - if (privateKeyPassphrase.empty()) - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, 0, 0, 0, 0, 0); - else - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, EVP_des_ede3_cbc(), - reinterpret_cast(const_cast(privateKeyPassphrase.c_str())), - static_cast(privateKeyPassphrase.length()), 0, 0); - if (!rc) throw Poco::FileException("Failed to write private key to file", privateKeyFile); - } - else throw Poco::CreateFileException("Cannot create private key file", privateKeyFile); - } - catch (...) - { - BIO_free(bio); - throw; - } - BIO_free(bio); - } -} - - -void RSAKeyImpl::save(std::ostream* pPublicKeyStream, - std::ostream* pPrivateKeyStream, - const std::string& privateKeyPassphrase) const -{ - if (pPublicKeyStream) - { - BIO* bio = BIO_new(BIO_s_mem()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing public key"); - if (!PEM_write_bio_RSAPublicKey(bio, _pRSA)) - { - BIO_free(bio); - throw Poco::WriteFileException("Failed to write public key to stream"); - } - char* pData; - long size = BIO_get_mem_data(bio, &pData); - pPublicKeyStream->write(pData, static_cast(size)); - BIO_free(bio); - } - - if (pPrivateKeyStream) - { - BIO* bio = BIO_new(BIO_s_mem()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing public key"); - int rc = 0; - if (privateKeyPassphrase.empty()) - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, 0, 0, 0, 0, 0); - else - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, EVP_des_ede3_cbc(), - reinterpret_cast(const_cast(privateKeyPassphrase.c_str())), - static_cast(privateKeyPassphrase.length()), 0, 0); - if (!rc) - { - BIO_free(bio); - throw Poco::FileException("Failed to write private key to stream"); - } - char* pData; - long size = BIO_get_mem_data(bio, &pData); - pPrivateKeyStream->write(pData, static_cast(size)); - BIO_free(bio); - } -} - - RSAKeyImpl::ByteVec RSAKeyImpl::convertToByteVec(const BIGNUM* bn) { int numBytes = BN_num_bytes(bn); @@ -383,4 +286,4 @@ RSAKeyImpl::ByteVec RSAKeyImpl::convertToByteVec(const BIGNUM* bn) } -} } // namespace Poco::Crypto \ No newline at end of file +} } // namespace Poco::Crypto diff --git a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h b/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h index 82e2f895638..3914f33df76 100644 --- a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h +++ b/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h @@ -466,7 +466,7 @@ namespace Data bool extractManualImpl(std::size_t pos, T & val, SQLSMALLINT cType) { SQLRETURN rc = 0; - T value = (T)0; + T value; resizeLengths(pos); diff --git a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Unicode_WIN32.h b/base/poco/Data/ODBC/include/Poco/Data/ODBC/Unicode_WIN32.h deleted file mode 100644 index 06af853e443..00000000000 --- a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Unicode_WIN32.h +++ /dev/null @@ -1,62 +0,0 @@ -// -// Unicode.h -// -// Library: Data/ODBC -// Package: ODBC -// Module: Unicode -// -// Definition of Unicode_WIN32. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_ODBC_Unicode_WIN32_INCLUDED -#define Data_ODBC_Unicode_WIN32_INCLUDED - - -namespace Poco -{ -namespace Data -{ - namespace ODBC - { - - - inline void makeUTF16(SQLCHAR * pSQLChar, SQLINTEGER length, std::wstring & target) - /// Utility function for conversion from UTF-8 to UTF-16 - { - int len = length; - if (SQL_NTS == len) - len = (int)std::strlen((const char *)pSQLChar); - - UnicodeConverter::toUTF16((const char *)pSQLChar, len, target); - } - - - inline void makeUTF8(Poco::Buffer & buffer, SQLINTEGER length, SQLPOINTER pTarget, SQLINTEGER targetLength) - /// Utility function for conversion from UTF-16 to UTF-8. Length is in bytes. - { - if (buffer.sizeBytes() < length) - throw InvalidArgumentException("Specified length exceeds available length."); - else if ((length % 2) != 0) - throw InvalidArgumentException("Length must be an even number."); - - length /= sizeof(wchar_t); - std::string result; - UnicodeConverter::toUTF8(buffer.begin(), length, result); - - std::memset(pTarget, 0, targetLength); - std::strncpy((char *)pTarget, result.c_str(), result.size() < targetLength ? result.size() : targetLength); - } - - - } -} -} // namespace Poco::Data::ODBC - - -#endif // Data_ODBC_Unicode_WIN32_INCLUDED diff --git a/base/poco/Data/ODBC/src/Unicode_WIN32.cpp b/base/poco/Data/ODBC/src/Unicode_WIN32.cpp deleted file mode 100644 index fe637e49b3d..00000000000 --- a/base/poco/Data/ODBC/src/Unicode_WIN32.cpp +++ /dev/null @@ -1,761 +0,0 @@ -// -// Unicode.cpp -// -// Library: Data/ODBC -// Package: ODBC -// Module: Unicode -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Data/ODBC/ODBC.h" -#include "Poco/Data/ODBC/Utility.h" -#include "Poco/Data/ODBC/Unicode_WIN32.h" -#include "Poco/Buffer.h" -#include "Poco/Exception.h" - - -using Poco::Buffer; -using Poco::InvalidArgumentException; -using Poco::NotImplementedException; - - -namespace Poco { -namespace Data { -namespace ODBC { - - -SQLRETURN SQLColAttribute(SQLHSTMT hstmt, - SQLUSMALLINT iCol, - SQLUSMALLINT iField, - SQLPOINTER pCharAttr, - SQLSMALLINT cbCharAttrMax, - SQLSMALLINT* pcbCharAttr, - NumAttrPtrType pNumAttr) -{ - if (isString(pCharAttr, cbCharAttrMax)) - { - Buffer buffer(stringLength(pCharAttr, cbCharAttrMax)); - - SQLRETURN rc = SQLColAttributeW(hstmt, - iCol, - iField, - buffer.begin(), - (SQLSMALLINT) buffer.sizeBytes(), - pcbCharAttr, - pNumAttr); - - makeUTF8(buffer, *pcbCharAttr, pCharAttr, cbCharAttrMax); - return rc; - } - - return SQLColAttributeW(hstmt, - iCol, - iField, - pCharAttr, - cbCharAttrMax, - pcbCharAttr, - pNumAttr); -} - - -SQLRETURN SQLColAttributes(SQLHSTMT hstmt, - SQLUSMALLINT icol, - SQLUSMALLINT fDescType, - SQLPOINTER rgbDesc, - SQLSMALLINT cbDescMax, - SQLSMALLINT* pcbDesc, - SQLLEN* pfDesc) -{ - return SQLColAttribute(hstmt, - icol, - fDescType, - rgbDesc, - cbDescMax, - pcbDesc, - pfDesc); -} - - -SQLRETURN SQLConnect(SQLHDBC hdbc, - SQLCHAR* szDSN, - SQLSMALLINT cbDSN, - SQLCHAR* szUID, - SQLSMALLINT cbUID, - SQLCHAR* szAuthStr, - SQLSMALLINT cbAuthStr) -{ - std::wstring sqlDSN; - makeUTF16(szDSN, cbDSN, sqlDSN); - - std::wstring sqlUID; - makeUTF16(szUID, cbUID, sqlUID); - - std::wstring sqlPWD; - makeUTF16(szAuthStr, cbAuthStr, sqlPWD); - - return SQLConnectW(hdbc, - (SQLWCHAR*) sqlDSN.c_str(), - (SQLSMALLINT) sqlDSN.size(), - (SQLWCHAR*) sqlUID.c_str(), - (SQLSMALLINT) sqlUID.size(), - (SQLWCHAR*) sqlPWD.c_str(), - (SQLSMALLINT) sqlPWD.size()); -} - - -SQLRETURN SQLDescribeCol(SQLHSTMT hstmt, - SQLUSMALLINT icol, - SQLCHAR* szColName, - SQLSMALLINT cbColNameMax, - SQLSMALLINT* pcbColName, - SQLSMALLINT* pfSqlType, - SQLULEN* pcbColDef, - SQLSMALLINT* pibScale, - SQLSMALLINT* pfNullable) -{ - Buffer buffer(cbColNameMax); - SQLRETURN rc = SQLDescribeColW(hstmt, - icol, - (SQLWCHAR*) buffer.begin(), - (SQLSMALLINT) buffer.size(), - pcbColName, - pfSqlType, - pcbColDef, - pibScale, - pfNullable); - - makeUTF8(buffer, *pcbColName * sizeof(wchar_t), szColName, cbColNameMax); - return rc; -} - - -SQLRETURN SQLError(SQLHENV henv, - SQLHDBC hdbc, - SQLHSTMT hstmt, - SQLCHAR* szSqlState, - SQLINTEGER* pfNativeError, - SQLCHAR* szErrorMsg, - SQLSMALLINT cbErrorMsgMax, - SQLSMALLINT* pcbErrorMsg) -{ - throw NotImplementedException("SQLError is obsolete. " - "Use SQLGetDiagRec instead."); -} - - -SQLRETURN SQLExecDirect(SQLHSTMT hstmt, - SQLCHAR* szSqlStr, - SQLINTEGER cbSqlStr) -{ - std::wstring sqlStr; - makeUTF16(szSqlStr, cbSqlStr, sqlStr); - - return SQLExecDirectW(hstmt, - (SQLWCHAR*) sqlStr.c_str(), - (SQLINTEGER) sqlStr.size()); -} - - -SQLRETURN SQLGetConnectAttr(SQLHDBC hdbc, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax, - SQLINTEGER* pcbValue) -{ - if (isString(rgbValue, cbValueMax)) - { - Buffer buffer(stringLength(rgbValue, cbValueMax)); - - SQLRETURN rc = SQLGetConnectAttrW(hdbc, - fAttribute, - buffer.begin(), - (SQLINTEGER) buffer.sizeBytes(), - pcbValue); - - makeUTF8(buffer, *pcbValue, rgbValue, cbValueMax); - return rc; - } - - - return SQLGetConnectAttrW(hdbc, - fAttribute, - rgbValue, - cbValueMax, - pcbValue); -} - - -SQLRETURN SQLGetCursorName(SQLHSTMT hstmt, - SQLCHAR* szCursor, - SQLSMALLINT cbCursorMax, - SQLSMALLINT* pcbCursor) -{ - throw NotImplementedException("Not implemented"); -} - - -SQLRETURN SQLSetDescField(SQLHDESC hdesc, - SQLSMALLINT iRecord, - SQLSMALLINT iField, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax) -{ - if (isString(rgbValue, cbValueMax)) - { - std::wstring str; - makeUTF16((SQLCHAR*) rgbValue, cbValueMax, str); - - SQLRETURN rc = SQLSetDescFieldW(hdesc, - iRecord, - iField, - (SQLPOINTER) str.c_str(), - (SQLINTEGER) str.size() * sizeof(std::wstring::value_type)); - - return rc; - } - - return SQLSetDescFieldW(hdesc, - iRecord, - iField, - rgbValue, - cbValueMax); -} - - -SQLRETURN SQLGetDescField(SQLHDESC hdesc, - SQLSMALLINT iRecord, - SQLSMALLINT iField, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax, - SQLINTEGER* pcbValue) -{ - if (isString(rgbValue, cbValueMax)) - { - Buffer buffer(stringLength(rgbValue, cbValueMax)); - - SQLRETURN rc = SQLGetDescFieldW(hdesc, - iRecord, - iField, - buffer.begin(), - (SQLINTEGER) buffer.sizeBytes(), - pcbValue); - - makeUTF8(buffer, *pcbValue, rgbValue, cbValueMax); - return rc; - } - - return SQLGetDescFieldW(hdesc, - iRecord, - iField, - rgbValue, - cbValueMax, - pcbValue); -} - - -SQLRETURN SQLGetDescRec(SQLHDESC hdesc, - SQLSMALLINT iRecord, - SQLCHAR* szName, - SQLSMALLINT cbNameMax, - SQLSMALLINT* pcbName, - SQLSMALLINT* pfType, - SQLSMALLINT* pfSubType, - SQLLEN* pLength, - SQLSMALLINT* pPrecision, - SQLSMALLINT* pScale, - SQLSMALLINT* pNullable) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLGetDiagField(SQLSMALLINT fHandleType, - SQLHANDLE handle, - SQLSMALLINT iRecord, - SQLSMALLINT fDiagField, - SQLPOINTER rgbDiagInfo, - SQLSMALLINT cbDiagInfoMax, - SQLSMALLINT* pcbDiagInfo) -{ - if (isString(rgbDiagInfo, cbDiagInfoMax)) - { - Buffer buffer(stringLength(rgbDiagInfo, cbDiagInfoMax)); - - SQLRETURN rc = SQLGetDiagFieldW(fHandleType, - handle, - iRecord, - fDiagField, - buffer.begin(), - (SQLSMALLINT) buffer.sizeBytes(), - pcbDiagInfo); - - makeUTF8(buffer, *pcbDiagInfo, rgbDiagInfo, cbDiagInfoMax); - return rc; - } - - return SQLGetDiagFieldW(fHandleType, - handle, - iRecord, - fDiagField, - rgbDiagInfo, - cbDiagInfoMax, - pcbDiagInfo); -} - - -SQLRETURN SQLGetDiagRec(SQLSMALLINT fHandleType, - SQLHANDLE handle, - SQLSMALLINT iRecord, - SQLCHAR* szSqlState, - SQLINTEGER* pfNativeError, - SQLCHAR* szErrorMsg, - SQLSMALLINT cbErrorMsgMax, - SQLSMALLINT* pcbErrorMsg) -{ - const SQLINTEGER stateLen = SQL_SQLSTATE_SIZE + 1; - Buffer bufState(stateLen); - Buffer bufErr(cbErrorMsgMax); - - SQLRETURN rc = SQLGetDiagRecW(fHandleType, - handle, - iRecord, - bufState.begin(), - pfNativeError, - bufErr.begin(), - (SQLSMALLINT) bufErr.size(), - pcbErrorMsg); - - makeUTF8(bufState, stateLen * sizeof(wchar_t), szSqlState, stateLen); - makeUTF8(bufErr, *pcbErrorMsg * sizeof(wchar_t), szErrorMsg, cbErrorMsgMax); - - return rc; -} - - -SQLRETURN SQLPrepare(SQLHSTMT hstmt, - SQLCHAR* szSqlStr, - SQLINTEGER cbSqlStr) -{ - std::wstring sqlStr; - makeUTF16(szSqlStr, cbSqlStr, sqlStr); - - return SQLPrepareW(hstmt, - (SQLWCHAR*) sqlStr.c_str(), - (SQLINTEGER) sqlStr.size()); -} - - -SQLRETURN SQLSetConnectAttr(SQLHDBC hdbc, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValue) -{ - if (isString(rgbValue, cbValue)) - { - std::wstring str; - makeUTF16((SQLCHAR*) rgbValue, cbValue, str); - - return SQLSetConnectAttrW(hdbc, - fAttribute, - (SQLWCHAR*) str.c_str(), - (SQLINTEGER) str.size() * sizeof(std::wstring::value_type)); - } - - return SQLSetConnectAttrW(hdbc, - fAttribute, - rgbValue, - cbValue); -} - - -SQLRETURN SQLSetCursorName(SQLHSTMT hstmt, - SQLCHAR* szCursor, - SQLSMALLINT cbCursor) -{ - throw NotImplementedException("Not implemented"); -} - - -SQLRETURN SQLSetStmtAttr(SQLHSTMT hstmt, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax) -{ - if (isString(rgbValue, cbValueMax)) - { - std::wstring str; - makeUTF16((SQLCHAR*) rgbValue, cbValueMax, str); - - return SQLSetStmtAttrW(hstmt, - fAttribute, - (SQLPOINTER) str.c_str(), - (SQLINTEGER) str.size()); - } - - return SQLSetStmtAttrW(hstmt, - fAttribute, - rgbValue, - cbValueMax); -} - - -SQLRETURN SQLGetStmtAttr(SQLHSTMT hstmt, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax, - SQLINTEGER* pcbValue) -{ - if (isString(rgbValue, cbValueMax)) - { - Buffer buffer(stringLength(rgbValue, cbValueMax)); - - return SQLGetStmtAttrW(hstmt, - fAttribute, - (SQLPOINTER) buffer.begin(), - (SQLINTEGER) buffer.sizeBytes(), - pcbValue); - } - - return SQLGetStmtAttrW(hstmt, - fAttribute, - rgbValue, - cbValueMax, - pcbValue); -} - - -SQLRETURN SQLColumns(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLCHAR* szColumnName, - SQLSMALLINT cbColumnName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLGetConnectOption(SQLHDBC hdbc, - SQLUSMALLINT fOption, - SQLPOINTER pvParam) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLGetInfo(SQLHDBC hdbc, - SQLUSMALLINT fInfoType, - SQLPOINTER rgbInfoValue, - SQLSMALLINT cbInfoValueMax, - SQLSMALLINT* pcbInfoValue) -{ - if (cbInfoValueMax) - { - Buffer buffer(cbInfoValueMax); - - SQLRETURN rc = SQLGetInfoW(hdbc, - fInfoType, - (SQLPOINTER) buffer.begin(), - (SQLSMALLINT) buffer.sizeBytes(), - pcbInfoValue); - - makeUTF8(buffer, *pcbInfoValue, rgbInfoValue, cbInfoValueMax); - - return rc; - } - - return SQLGetInfoW(hdbc, - fInfoType, - rgbInfoValue, - cbInfoValueMax, - pcbInfoValue); -} - - -SQLRETURN SQLGetTypeInfo(SQLHSTMT StatementHandle, SQLSMALLINT DataType) -{ - return SQLGetTypeInfoW(StatementHandle, DataType); -} - - -SQLRETURN SQLSetConnectOption(SQLHDBC hdbc, - SQLUSMALLINT fOption, - SQLULEN vParam) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLSpecialColumns(SQLHSTMT hstmt, - SQLUSMALLINT fColType, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLUSMALLINT fScope, - SQLUSMALLINT fNullable) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLStatistics(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLUSMALLINT fUnique, - SQLUSMALLINT fAccuracy) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLTables(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLCHAR* szTableType, - SQLSMALLINT cbTableType) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLDataSources(SQLHENV henv, - SQLUSMALLINT fDirection, - SQLCHAR* szDSN, - SQLSMALLINT cbDSNMax, - SQLSMALLINT* pcbDSN, - SQLCHAR* szDesc, - SQLSMALLINT cbDescMax, - SQLSMALLINT* pcbDesc) -{ - Buffer bufDSN(cbDSNMax); - Buffer bufDesc(cbDescMax); - - SQLRETURN rc = SQLDataSourcesW(henv, - fDirection, - bufDSN.begin(), - (SQLSMALLINT) bufDSN.size(), - pcbDSN, - bufDesc.begin(), - (SQLSMALLINT) bufDesc.size(), - pcbDesc); - - makeUTF8(bufDSN, *pcbDSN * sizeof(wchar_t), szDSN, cbDSNMax); - makeUTF8(bufDesc, *pcbDesc * sizeof(wchar_t), szDesc, cbDescMax); - - return rc; -} - - -SQLRETURN SQLDriverConnect(SQLHDBC hdbc, - SQLHWND hwnd, - SQLCHAR* szConnStrIn, - SQLSMALLINT cbConnStrIn, - SQLCHAR* szConnStrOut, - SQLSMALLINT cbConnStrOutMax, - SQLSMALLINT* pcbConnStrOut, - SQLUSMALLINT fDriverCompletion) -{ - std::wstring connStrIn; - int len = cbConnStrIn; - if (SQL_NTS == len) - len = (int) std::strlen((const char*) szConnStrIn); - - Poco::UnicodeConverter::toUTF16((const char *) szConnStrIn, len, connStrIn); - - Buffer bufOut(cbConnStrOutMax); - SQLRETURN rc = SQLDriverConnectW(hdbc, - hwnd, - (SQLWCHAR*) connStrIn.c_str(), - (SQLSMALLINT) connStrIn.size(), - bufOut.begin(), - (SQLSMALLINT) bufOut.size(), - pcbConnStrOut, - fDriverCompletion); - - if (!Utility::isError(rc)) - makeUTF8(bufOut, *pcbConnStrOut * sizeof(wchar_t), szConnStrOut, cbConnStrOutMax); - - return rc; -} - - -SQLRETURN SQLBrowseConnect(SQLHDBC hdbc, - SQLCHAR* szConnStrIn, - SQLSMALLINT cbConnStrIn, - SQLCHAR* szConnStrOut, - SQLSMALLINT cbConnStrOutMax, - SQLSMALLINT* pcbConnStrOut) -{ - std::wstring str; - makeUTF16(szConnStrIn, cbConnStrIn, str); - - Buffer bufConnStrOut(cbConnStrOutMax); - - SQLRETURN rc = SQLBrowseConnectW(hdbc, - (SQLWCHAR*) str.c_str(), - (SQLSMALLINT) str.size(), - bufConnStrOut.begin(), - (SQLSMALLINT) bufConnStrOut.size(), - pcbConnStrOut); - - makeUTF8(bufConnStrOut, *pcbConnStrOut * sizeof(wchar_t), szConnStrOut, cbConnStrOutMax); - - return rc; -} - - -SQLRETURN SQLColumnPrivileges(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLCHAR* szColumnName, - SQLSMALLINT cbColumnName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLForeignKeys(SQLHSTMT hstmt, - SQLCHAR* szPkCatalogName, - SQLSMALLINT cbPkCatalogName, - SQLCHAR* szPkSchemaName, - SQLSMALLINT cbPkSchemaName, - SQLCHAR* szPkTableName, - SQLSMALLINT cbPkTableName, - SQLCHAR* szFkCatalogName, - SQLSMALLINT cbFkCatalogName, - SQLCHAR* szFkSchemaName, - SQLSMALLINT cbFkSchemaName, - SQLCHAR* szFkTableName, - SQLSMALLINT cbFkTableName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLNativeSql(SQLHDBC hdbc, - SQLCHAR* szSqlStrIn, - SQLINTEGER cbSqlStrIn, - SQLCHAR* szSqlStr, - SQLINTEGER cbSqlStrMax, - SQLINTEGER* pcbSqlStr) -{ - std::wstring str; - makeUTF16(szSqlStrIn, cbSqlStrIn, str); - - Buffer bufSQLOut(cbSqlStrMax); - - SQLRETURN rc = SQLNativeSqlW(hdbc, - (SQLWCHAR*) str.c_str(), - (SQLINTEGER) str.size(), - bufSQLOut.begin(), - (SQLINTEGER) bufSQLOut.size(), - pcbSqlStr); - - makeUTF8(bufSQLOut, *pcbSqlStr * sizeof(wchar_t), szSqlStr, cbSqlStrMax); - - return rc; -} - - -SQLRETURN SQLPrimaryKeys(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLProcedureColumns(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szProcName, - SQLSMALLINT cbProcName, - SQLCHAR* szColumnName, - SQLSMALLINT cbColumnName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLProcedures(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szProcName, - SQLSMALLINT cbProcName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLTablePrivileges(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLDrivers(SQLHENV henv, - SQLUSMALLINT fDirection, - SQLCHAR* szDriverDesc, - SQLSMALLINT cbDriverDescMax, - SQLSMALLINT* pcbDriverDesc, - SQLCHAR* szDriverAttributes, - SQLSMALLINT cbDrvrAttrMax, - SQLSMALLINT* pcbDrvrAttr) -{ - Buffer bufDriverDesc(cbDriverDescMax); - Buffer bufDriverAttr(cbDrvrAttrMax); - - SQLRETURN rc = SQLDriversW(henv, - fDirection, - bufDriverDesc.begin(), - (SQLSMALLINT) bufDriverDesc.size(), - pcbDriverDesc, - bufDriverAttr.begin(), - (SQLSMALLINT) bufDriverAttr.size(), - pcbDrvrAttr); - - makeUTF8(bufDriverDesc, *pcbDriverDesc * sizeof(wchar_t), szDriverDesc, cbDriverDescMax); - makeUTF8(bufDriverAttr, *pcbDrvrAttr * sizeof(wchar_t), szDriverAttributes, cbDrvrAttrMax); - - return rc; -} - - -} } } // namespace Poco::Data::ODBC diff --git a/base/poco/Data/include/Poco/Data/AutoTransaction.h b/base/poco/Data/include/Poco/Data/AutoTransaction.h deleted file mode 100644 index a222bd27afe..00000000000 --- a/base/poco/Data/include/Poco/Data/AutoTransaction.h +++ /dev/null @@ -1,37 +0,0 @@ -// -// AutoTransaction.h -// -// Library: Data -// Package: DataCore -// Module: AutoTransaction -// -// Forward header for the Transaction class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_AutoTransaction_INCLUDED -#define Data_AutoTransaction_INCLUDED - - -#include "Poco/Data/Transaction.h" - - -namespace Poco -{ -namespace Data -{ - - - typedef Transaction AutoTransaction; - - -} -} // namespace Poco::Data - - -#endif // Data_AutoTransaction_INCLUDED diff --git a/base/poco/Data/include/Poco/Data/DynamicLOB.h b/base/poco/Data/include/Poco/Data/DynamicLOB.h deleted file mode 100644 index 749b269ffac..00000000000 --- a/base/poco/Data/include/Poco/Data/DynamicLOB.h +++ /dev/null @@ -1,54 +0,0 @@ -// -// DynamicLOB.h -// -// Library: Data -// Package: DataCore -// Module: DynamicLOB -// -// Definition of the Poco::Dynamic::Var LOB cast operators. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_DynamicLOB_INCLUDED -#define Data_DynamicLOB_INCLUDED - - -#include "Poco/Data/Data.h" -#include "Poco/Data/LOB.h" -#include "Poco/Dynamic/Var.h" - - -namespace Poco -{ -namespace Data -{ - - template - class LOB; - typedef LOB BLOB; - typedef LOB CLOB; - -} -} // namespace Poco::Data - - -namespace Poco -{ -namespace Dynamic -{ - - template <> - Data_API Var::operator Poco::Data::CLOB() const; - template <> - Data_API Var::operator Poco::Data::BLOB() const; - -} -} // namespace Poco::Dynamic - - -#endif // Data_DynamicLOB_INCLUDED diff --git a/base/poco/Data/include/Poco/Data/LOBStream.h b/base/poco/Data/include/Poco/Data/LOBStream.h deleted file mode 100644 index 23346224c0f..00000000000 --- a/base/poco/Data/include/Poco/Data/LOBStream.h +++ /dev/null @@ -1,149 +0,0 @@ -// -// LOBStream.h -// -// Library: Data -// Package: DataCore -// Module: LOBStream -// -// Definition of the LOBStream class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_LOBStream_INCLUDED -#define Data_LOBStream_INCLUDED - - -#include -#include -#include "Poco/Data/LOB.h" -#include "Poco/Foundation.h" -#include "Poco/UnbufferedStreamBuf.h" - - -namespace Poco -{ -namespace Data -{ - - - template - class LOBStreamBuf : public BasicUnbufferedStreamBuf> - /// This is the streambuf class used for reading from and writing to a LOB. - { - public: - LOBStreamBuf(LOB & lob) : _lob(lob), _it(_lob.begin()) - /// Creates LOBStreamBuf. - { - } - - - ~LOBStreamBuf() - /// Destroys LOBStreamBuf. - { - } - - protected: - typedef std::char_traits TraitsType; - typedef BasicUnbufferedStreamBuf BaseType; - - typename BaseType::int_type readFromDevice() - { - if (_it != _lob.end()) - return BaseType::charToInt(*_it++); - else - return -1; - } - - typename BaseType::int_type writeToDevice(T c) - { - _lob.appendRaw(&c, 1); - return 1; - } - - private: - LOB & _lob; - typename LOB::Iterator _it; - }; - - - template - class LOBIOS : public virtual std::ios - /// The base class for LOBInputStream and - /// LOBOutputStream. - /// - /// This class is needed to ensure the correct initialization - /// order of the stream buffer and base classes. - { - public: - LOBIOS(LOB & lob, openmode mode) : _buf(lob) - /// Creates the LOBIOS with the given LOB. - { - poco_ios_init(&_buf); - } - - ~LOBIOS() - /// Destroys the LOBIOS. - { - } - - LOBStreamBuf * rdbuf() - /// Returns a pointer to the internal LOBStreamBuf. - { - return &_buf; - } - - protected: - LOBStreamBuf _buf; - }; - - - template - class LOBOutputStream : public LOBIOS, public std::basic_ostream> - /// An output stream for writing to a LOB. - { - public: - LOBOutputStream(LOB & lob) : LOBIOS(lob, std::ios::out), std::ostream(LOBIOS::rdbuf()) - /// Creates the LOBOutputStream with the given LOB. - { - } - - ~LOBOutputStream() - /// Destroys the LOBOutputStream. - { - } - }; - - - template - class LOBInputStream : public LOBIOS, public std::basic_istream> - /// An input stream for reading from a LOB. - { - public: - LOBInputStream(LOB & lob) : LOBIOS(lob, std::ios::in), std::istream(LOBIOS::rdbuf()) - /// Creates the LOBInputStream with the given LOB. - { - } - - ~LOBInputStream() - /// Destroys the LOBInputStream. - { - } - }; - - - typedef LOBOutputStream BLOBOutputStream; - typedef LOBOutputStream CLOBOutputStream; - - typedef LOBInputStream BLOBInputStream; - typedef LOBInputStream CLOBInputStream; - -} -} // namespace Poco::Data - - -#endif // Data_LOBStream_INCLUDED diff --git a/base/poco/Data/src/DynamicLOB.cpp b/base/poco/Data/src/DynamicLOB.cpp deleted file mode 100644 index 5dfe3df8574..00000000000 --- a/base/poco/Data/src/DynamicLOB.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// -// DynamicLOB.cpp -// -// Library: Data -// Package: DataCore -// Module: DynamicLOB -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifdef __GNUC__ -// TODO: determine g++ version able to do the right thing without these specializations - -#include "Poco/Data/DynamicLOB.h" -#include "Poco/Data/LOB.h" -#include "Poco/Dynamic/Var.h" - - -namespace Poco { -namespace Dynamic { - - -using Poco::Data::CLOB; -using Poco::Data::BLOB; - - -template <> -Var::operator CLOB () const -{ - VarHolder* pHolder = content(); - - if (!pHolder) - throw InvalidAccessException("Can not convert empty value."); - - if (typeid(CLOB) == pHolder->type()) - return extract(); - else - { - std::string result; - pHolder->convert(result); - return CLOB(result); - } -} - - -template <> -Var::operator BLOB () const -{ - VarHolder* pHolder = content(); - - if (!pHolder) - throw InvalidAccessException("Can not convert empty value."); - - if (typeid(BLOB) == pHolder->type()) - return extract(); - else - { - std::string result; - pHolder->convert(result); - return BLOB(reinterpret_cast(result.data()), - result.size()); - } -} - - -} } // namespace Poco::Data - - -#endif // __GNUC__ - diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index dceb18e68cc..358f49ed055 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -31,8 +31,6 @@ set (SRCS src/ASCIIEncoding.cpp src/AsyncChannel.cpp src/AtomicCounter.cpp - src/Base32Decoder.cpp - src/Base32Encoder.cpp src/Base64Decoder.cpp src/Base64Encoder.cpp src/BinaryReader.cpp @@ -81,9 +79,6 @@ set (SRCS src/HexBinaryEncoder.cpp src/InflatingStream.cpp src/JSONString.cpp - src/Latin1Encoding.cpp - src/Latin2Encoding.cpp - src/Latin9Encoding.cpp src/LineEndingConverter.cpp src/LocalDateTime.cpp src/LogFile.cpp @@ -91,8 +86,6 @@ set (SRCS src/LoggingFactory.cpp src/LoggingRegistry.cpp src/LogStream.cpp - src/Manifest.cpp - src/MD4Engine.cpp src/MD5Engine.cpp src/MemoryPool.cpp src/MemoryStream.cpp @@ -113,7 +106,6 @@ set (SRCS src/PatternFormatter.cpp src/Pipe.cpp src/PipeImpl.cpp - src/PipeStream.cpp src/PriorityNotificationQueue.cpp src/Process.cpp src/PurgeStrategy.cpp @@ -136,10 +128,8 @@ set (SRCS src/StreamChannel.cpp src/StreamConverter.cpp src/StreamCopier.cpp - src/StreamTokenizer.cpp src/String.cpp src/StringTokenizer.cpp - src/SynchronizedObject.cpp src/SyslogChannel.cpp src/Task.cpp src/TaskManager.cpp @@ -175,9 +165,6 @@ set (SRCS src/VarHolder.cpp src/VarIterator.cpp src/Void.cpp - src/Windows1250Encoding.cpp - src/Windows1251Encoding.cpp - src/Windows1252Encoding.cpp ) add_library (_poco_foundation ${SRCS}) @@ -233,7 +220,8 @@ target_link_libraries (_poco_foundation PRIVATE Poco::Foundation::PCRE ch_contrib::zlib - ch_contrib::lz4) + ch_contrib::lz4 + ch_contrib::double_conversion) if(OS_DARWIN AND ARCH_AARCH64) target_compile_definitions (_poco_foundation diff --git a/base/poco/Foundation/include/Poco/Base32Decoder.h b/base/poco/Foundation/include/Poco/Base32Decoder.h deleted file mode 100644 index 96b6f013db8..00000000000 --- a/base/poco/Foundation/include/Poco/Base32Decoder.h +++ /dev/null @@ -1,105 +0,0 @@ -// -// Base32Decoder.h -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Definition of class Base32Decoder. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Base32Decoder_INCLUDED -#define Foundation_Base32Decoder_INCLUDED - - -#include -#include "Poco/Foundation.h" -#include "Poco/UnbufferedStreamBuf.h" - - -namespace Poco -{ - - -class Foundation_API Base32DecoderBuf : public UnbufferedStreamBuf -/// This streambuf base32-decodes all data read -/// from the istream connected to it. -/// -/// Note: For performance reasons, the characters -/// are read directly from the given istream's -/// underlying streambuf, so the state -/// of the istream will not reflect that of -/// its streambuf. -{ -public: - Base32DecoderBuf(std::istream & istr); - ~Base32DecoderBuf(); - -private: - int readFromDevice(); - int readOne(); - - unsigned char _group[8]; - int _groupLength; - int _groupIndex; - std::streambuf & _buf; - - static unsigned char IN_ENCODING[256]; - static bool IN_ENCODING_INIT; - -private: - Base32DecoderBuf(const Base32DecoderBuf &); - Base32DecoderBuf & operator=(const Base32DecoderBuf &); -}; - - -class Foundation_API Base32DecoderIOS : public virtual std::ios -/// The base class for Base32Decoder. -/// -/// This class is needed to ensure the correct initialization -/// order of the stream buffer and base classes. -{ -public: - Base32DecoderIOS(std::istream & istr); - ~Base32DecoderIOS(); - Base32DecoderBuf * rdbuf(); - -protected: - Base32DecoderBuf _buf; - -private: - Base32DecoderIOS(const Base32DecoderIOS &); - Base32DecoderIOS & operator=(const Base32DecoderIOS &); -}; - - -class Foundation_API Base32Decoder : public Base32DecoderIOS, public std::istream -/// This istream base32-decodes all data -/// read from the istream connected to it. -/// -/// Note: For performance reasons, the characters -/// are read directly from the given istream's -/// underlying streambuf, so the state -/// of the istream will not reflect that of -/// its streambuf. -{ -public: - Base32Decoder(std::istream & istr); - ~Base32Decoder(); - -private: - Base32Decoder(const Base32Decoder &); - Base32Decoder & operator=(const Base32Decoder &); -}; - - -} // namespace Poco - - -#endif // Foundation_Base32Decoder_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Base32Encoder.h b/base/poco/Foundation/include/Poco/Base32Encoder.h deleted file mode 100644 index ced0dd6f3bb..00000000000 --- a/base/poco/Foundation/include/Poco/Base32Encoder.h +++ /dev/null @@ -1,111 +0,0 @@ -// -// Base32Encoder.h -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Definition of class Base32Encoder. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Base32Encoder_INCLUDED -#define Foundation_Base32Encoder_INCLUDED - - -#include -#include "Poco/Foundation.h" -#include "Poco/UnbufferedStreamBuf.h" - - -namespace Poco -{ - - -class Foundation_API Base32EncoderBuf : public UnbufferedStreamBuf -/// This streambuf base32-encodes all data written -/// to it and forwards it to a connected -/// ostream. -/// -/// Note: The characters are directly written -/// to the ostream's streambuf, thus bypassing -/// the ostream. The ostream's state is therefore -/// not updated to match the buffer's state. -{ -public: - Base32EncoderBuf(std::ostream & ostr, bool padding = true); - ~Base32EncoderBuf(); - - int close(); - /// Closes the stream buffer. - -private: - int writeToDevice(char c); - - unsigned char _group[5]; - int _groupLength; - std::streambuf & _buf; - bool _doPadding; - - static const unsigned char OUT_ENCODING[32]; - - friend class Base32DecoderBuf; - - Base32EncoderBuf(const Base32EncoderBuf &); - Base32EncoderBuf & operator=(const Base32EncoderBuf &); -}; - - -class Foundation_API Base32EncoderIOS : public virtual std::ios -/// The base class for Base32Encoder. -/// -/// This class is needed to ensure the correct initialization -/// order of the stream buffer and base classes. -{ -public: - Base32EncoderIOS(std::ostream & ostr, bool padding = true); - ~Base32EncoderIOS(); - int close(); - Base32EncoderBuf * rdbuf(); - -protected: - Base32EncoderBuf _buf; - -private: - Base32EncoderIOS(const Base32EncoderIOS &); - Base32EncoderIOS & operator=(const Base32EncoderIOS &); -}; - - -class Foundation_API Base32Encoder : public Base32EncoderIOS, public std::ostream -/// This ostream base32-encodes all data -/// written to it and forwards it to -/// a connected ostream. -/// Always call close() when done -/// writing data, to ensure proper -/// completion of the encoding operation. -/// -/// Note: The characters are directly written -/// to the ostream's streambuf, thus bypassing -/// the ostream. The ostream's state is therefore -/// not updated to match the buffer's state. -{ -public: - Base32Encoder(std::ostream & ostr, bool padding = true); - ~Base32Encoder(); - -private: - Base32Encoder(const Base32Encoder &); - Base32Encoder & operator=(const Base32Encoder &); -}; - - -} // namespace Poco - - -#endif // Foundation_Base32Encoder_INCLUDED diff --git a/base/poco/Foundation/include/Poco/ClassLibrary.h b/base/poco/Foundation/include/Poco/ClassLibrary.h deleted file mode 100644 index deb43f26297..00000000000 --- a/base/poco/Foundation/include/Poco/ClassLibrary.h +++ /dev/null @@ -1,92 +0,0 @@ -// -// ClassLibrary.h -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Definitions for class libraries. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_ClassLibrary_INCLUDED -#define Foundation_ClassLibrary_INCLUDED - - -#include -#include "Poco/Foundation.h" -#include "Poco/Manifest.h" - - -# define POCO_LIBRARY_API - - -// -// the entry points for every class library -// -extern "C" { -bool POCO_LIBRARY_API pocoBuildManifest(Poco::ManifestBase * pManifest); -void POCO_LIBRARY_API pocoInitializeLibrary(); -void POCO_LIBRARY_API pocoUninitializeLibrary(); -} - - -// -// additional support for named manifests -// -#define POCO_DECLARE_NAMED_MANIFEST(name) \ - extern "C" { \ - bool POCO_LIBRARY_API POCO_JOIN(pocoBuildManifest, name)(Poco::ManifestBase * pManifest); \ - } - - -// -// Macros to automatically implement pocoBuildManifest -// -// usage: -// -// POCO_BEGIN_MANIFEST(MyBaseClass) -// POCO_EXPORT_CLASS(MyFirstClass) -// POCO_EXPORT_CLASS(MySecondClass) -// ... -// POCO_END_MANIFEST -// -#define POCO_BEGIN_MANIFEST_IMPL(fnName, base) \ - bool fnName(Poco::ManifestBase * pManifest_) \ - { \ - typedef base _Base; \ - typedef Poco::Manifest<_Base> _Manifest; \ - std::string requiredType(typeid(_Manifest).name()); \ - std::string actualType(pManifest_->className()); \ - if (requiredType == actualType) \ - { \ - Poco::Manifest<_Base> * pManifest = static_cast<_Manifest *>(pManifest_); - - -#define POCO_BEGIN_MANIFEST(base) POCO_BEGIN_MANIFEST_IMPL(pocoBuildManifest, base) - - -#define POCO_BEGIN_NAMED_MANIFEST(name, base) \ - POCO_DECLARE_NAMED_MANIFEST(name) \ - POCO_BEGIN_MANIFEST_IMPL(POCO_JOIN(pocoBuildManifest, name), base) - - -#define POCO_END_MANIFEST \ - return true; \ - } \ - else return false; \ - } - - -#define POCO_EXPORT_CLASS(cls) pManifest->insert(new Poco::MetaObject(#cls)); - - -#define POCO_EXPORT_SINGLETON(cls) pManifest->insert(new Poco::MetaSingleton(#cls)); - - -#endif // Foundation_ClassLibrary_INCLUDED diff --git a/base/poco/Foundation/include/Poco/ClassLoader.h b/base/poco/Foundation/include/Poco/ClassLoader.h deleted file mode 100644 index 6752a6e7ecd..00000000000 --- a/base/poco/Foundation/include/Poco/ClassLoader.h +++ /dev/null @@ -1,355 +0,0 @@ -// -// ClassLoader.h -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Definition of the ClassLoader class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_ClassLoader_INCLUDED -#define Foundation_ClassLoader_INCLUDED - - -#include -#include "Poco/Exception.h" -#include "Poco/Foundation.h" -#include "Poco/Manifest.h" -#include "Poco/MetaObject.h" -#include "Poco/Mutex.h" -#include "Poco/SharedLibrary.h" - - -namespace Poco -{ - - -template -class ClassLoader -/// The ClassLoader loads C++ classes from shared libraries -/// at runtime. It must be instantiated with a root class -/// of the loadable classes. -/// For a class to be loadable from a library, the library -/// must provide a Manifest of all the classes it contains. -/// The Manifest for a shared library can be easily built -/// with the help of the macros in the header file -/// "Foundation/ClassLibrary.h". -/// -/// Starting with POCO release 1.3, a class library can -/// export multiple manifests. In addition to the default -/// (unnamed) manifest, multiple named manifests can -/// be exported, each having a different base class. -/// -/// There is one important restriction: one instance of -/// ClassLoader can only load one manifest from a class -/// library. -{ -public: - typedef AbstractMetaObject Meta; - typedef Manifest Manif; - typedef void (*InitializeLibraryFunc)(); - typedef void (*UninitializeLibraryFunc)(); - typedef bool (*BuildManifestFunc)(ManifestBase *); - - struct LibraryInfo - { - SharedLibrary * pLibrary; - const Manif * pManifest; - int refCount; - }; - typedef std::map LibraryMap; - - class Iterator - /// The ClassLoader's very own iterator class. - { - public: - typedef std::pair Pair; - - Iterator(const typename LibraryMap::const_iterator & it) { _it = it; } - Iterator(const Iterator & it) { _it = it._it; } - ~Iterator() { } - Iterator & operator=(const Iterator & it) - { - _it = it._it; - return *this; - } - inline bool operator==(const Iterator & it) const { return _it == it._it; } - inline bool operator!=(const Iterator & it) const { return _it != it._it; } - Iterator & operator++() // prefix - { - ++_it; - return *this; - } - Iterator operator++(int) // postfix - { - Iterator result(_it); - ++_it; - return result; - } - inline const Pair * operator*() const - { - _pair.first = _it->first; - _pair.second = _it->second.pManifest; - return &_pair; - } - inline const Pair * operator->() const - { - _pair.first = _it->first; - _pair.second = _it->second.pManifest; - return &_pair; - } - - private: - typename LibraryMap::const_iterator _it; - mutable Pair _pair; - }; - - ClassLoader() - /// Creates the ClassLoader. - { - } - - virtual ~ClassLoader() - /// Destroys the ClassLoader. - { - for (typename LibraryMap::const_iterator it = _map.begin(); it != _map.end(); ++it) - { - delete it->second.pLibrary; - delete it->second.pManifest; - } - } - - void loadLibrary(const std::string & path, const std::string & manifest) - /// Loads a library from the given path, using the given manifest. - /// Does nothing if the library is already loaded. - /// Throws a LibraryLoadException if the library - /// cannot be loaded or does not have a Manifest. - /// If the library exports a function named "pocoInitializeLibrary", - /// this function is executed. - /// If called multiple times for the same library, - /// the number of calls to unloadLibrary() must be the same - /// for the library to become unloaded. - { - FastMutex::ScopedLock lock(_mutex); - - typename LibraryMap::iterator it = _map.find(path); - if (it == _map.end()) - { - LibraryInfo li; - li.pLibrary = 0; - li.pManifest = 0; - li.refCount = 1; - try - { - li.pLibrary = new SharedLibrary(path); - li.pManifest = new Manif(); - std::string pocoBuildManifestSymbol("pocoBuildManifest"); - pocoBuildManifestSymbol.append(manifest); - if (li.pLibrary->hasSymbol("pocoInitializeLibrary")) - { - InitializeLibraryFunc initializeLibrary = (InitializeLibraryFunc)li.pLibrary->getSymbol("pocoInitializeLibrary"); - initializeLibrary(); - } - if (li.pLibrary->hasSymbol(pocoBuildManifestSymbol)) - { - BuildManifestFunc buildManifest = (BuildManifestFunc)li.pLibrary->getSymbol(pocoBuildManifestSymbol); - if (buildManifest(const_cast(li.pManifest))) - _map[path] = li; - else - throw LibraryLoadException(std::string("Manifest class mismatch in ") + path, manifest); - } - else - throw LibraryLoadException(std::string("No manifest in ") + path, manifest); - } - catch (...) - { - delete li.pLibrary; - delete li.pManifest; - throw; - } - } - else - { - ++it->second.refCount; - } - } - - void loadLibrary(const std::string & path) - /// Loads a library from the given path. Does nothing - /// if the library is already loaded. - /// Throws a LibraryLoadException if the library - /// cannot be loaded or does not have a Manifest. - /// If the library exports a function named "pocoInitializeLibrary", - /// this function is executed. - /// If called multiple times for the same library, - /// the number of calls to unloadLibrary() must be the same - /// for the library to become unloaded. - /// - /// Equivalent to loadLibrary(path, ""). - { - loadLibrary(path, ""); - } - - void unloadLibrary(const std::string & path) - /// Unloads the given library. - /// Be extremely cautious when unloading shared libraries. - /// If objects from the library are still referenced somewhere, - /// a total crash is very likely. - /// If the library exports a function named "pocoUninitializeLibrary", - /// this function is executed before it is unloaded. - /// If loadLibrary() has been called multiple times for the same - /// library, the number of calls to unloadLibrary() must be the same - /// for the library to become unloaded. - { - FastMutex::ScopedLock lock(_mutex); - - typename LibraryMap::iterator it = _map.find(path); - if (it != _map.end()) - { - if (--it->second.refCount == 0) - { - if (it->second.pLibrary->hasSymbol("pocoUninitializeLibrary")) - { - UninitializeLibraryFunc uninitializeLibrary - = (UninitializeLibraryFunc)it->second.pLibrary->getSymbol("pocoUninitializeLibrary"); - uninitializeLibrary(); - } - delete it->second.pManifest; - it->second.pLibrary->unload(); - delete it->second.pLibrary; - _map.erase(it); - } - } - else - throw NotFoundException(path); - } - - const Meta * findClass(const std::string & className) const - /// Returns a pointer to the MetaObject for the given - /// class, or a null pointer if the class is not known. - { - FastMutex::ScopedLock lock(_mutex); - - for (typename LibraryMap::const_iterator it = _map.begin(); it != _map.end(); ++it) - { - const Manif * pManif = it->second.pManifest; - typename Manif::Iterator itm = pManif->find(className); - if (itm != pManif->end()) - return *itm; - } - return 0; - } - - const Meta & classFor(const std::string & className) const - /// Returns a reference to the MetaObject for the given - /// class. Throws a NotFoundException if the class - /// is not known. - { - const Meta * pMeta = findClass(className); - if (pMeta) - return *pMeta; - else - throw NotFoundException(className); - } - - Base * create(const std::string & className) const - /// Creates an instance of the given class. - /// Throws a NotFoundException if the class - /// is not known. - { - return classFor(className).create(); - } - - Base & instance(const std::string & className) const - /// Returns a reference to the sole instance of - /// the given class. The class must be a singleton, - /// otherwise an InvalidAccessException will be thrown. - /// Throws a NotFoundException if the class - /// is not known. - { - return classFor(className).instance(); - } - - bool canCreate(const std::string & className) const - /// Returns true if create() can create new instances - /// of the class. - { - return classFor(className).canCreate(); - } - - void destroy(const std::string & className, Base * pObject) const - /// Destroys the object pObject points to. - /// Does nothing if object is not found. - { - classFor(className).destroy(pObject); - } - - bool isAutoDelete(const std::string & className, Base * pObject) const - /// Returns true if the object is automatically - /// deleted by its meta object. - { - return classFor(className).isAutoDelete(pObject); - } - - const Manif * findManifest(const std::string & path) const - /// Returns a pointer to the Manifest for the given - /// library, or a null pointer if the library has not been loaded. - { - FastMutex::ScopedLock lock(_mutex); - - typename LibraryMap::const_iterator it = _map.find(path); - if (it != _map.end()) - return it->second.pManifest; - else - return 0; - } - - const Manif & manifestFor(const std::string & path) const - /// Returns a reference to the Manifest for the given library - /// Throws a NotFoundException if the library has not been loaded. - { - const Manif * pManif = findManifest(path); - if (pManif) - return *pManif; - else - throw NotFoundException(path); - } - - bool isLibraryLoaded(const std::string & path) const - /// Returns true if the library with the given name - /// has already been loaded. - { - return findManifest(path) != 0; - } - - Iterator begin() const - { - FastMutex::ScopedLock lock(_mutex); - - return Iterator(_map.begin()); - } - - Iterator end() const - { - FastMutex::ScopedLock lock(_mutex); - - return Iterator(_map.end()); - } - -private: - LibraryMap _map; - mutable FastMutex _mutex; -}; - - -} // namespace Poco - - -#endif // Foundation_ClassLoader_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Config.h b/base/poco/Foundation/include/Poco/Config.h index e3fcb9d91cd..83d2c89553a 100644 --- a/base/poco/Foundation/include/Poco/Config.h +++ b/base/poco/Foundation/include/Poco/Config.h @@ -53,7 +53,7 @@ // Define if no header is available (such as on WinCE) -// #define POCO_NO_LOCALE +#define POCO_NO_LOCALE // Define to desired default thread stack size diff --git a/base/poco/Foundation/include/Poco/EventLogChannel.h b/base/poco/Foundation/include/Poco/EventLogChannel.h deleted file mode 100644 index e6a79088204..00000000000 --- a/base/poco/Foundation/include/Poco/EventLogChannel.h +++ /dev/null @@ -1,102 +0,0 @@ -// -// EventLogChannel.h -// -// Library: Foundation -// Package: Logging -// Module: EventLogChannel -// -// Definition of the EventLogChannel class specific to WIN32. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_EventLogChannel_INCLUDED -#define Foundation_EventLogChannel_INCLUDED - - -#include "Poco/Channel.h" -#include "Poco/Foundation.h" -#include "Poco/UnWindows.h" - - -namespace Poco -{ - - -class Foundation_API EventLogChannel : public Channel -/// This Windows-only channel works with the Windows NT Event Log -/// service. -/// -/// To work properly, the EventLogChannel class requires that either -/// the PocoFoundation.dll or the PocoMsg.dll Dynamic Link Library -/// containing the message definition resources can be found in $PATH. -{ -public: - EventLogChannel(); - /// Creates the EventLogChannel. - /// The name of the current application (or more correctly, - /// the name of its executable) is taken as event source name. - - EventLogChannel(const std::string & name); - /// Creates the EventLogChannel with the given event source name. - - EventLogChannel(const std::string & name, const std::string & host); - /// Creates an EventLogChannel with the given event source - /// name that routes messages to the given host. - - void open(); - /// Opens the EventLogChannel. If necessary, the - /// required registry entries to register a - /// message resource DLL are made. - - void close(); - /// Closes the EventLogChannel. - - void log(const Message & msg); - /// Logs the given message to the Windows Event Log. - /// - /// The message type and priority are mapped to - /// appropriate values for Event Log type and category. - - void setProperty(const std::string & name, const std::string & value); - /// Sets or changes a configuration property. - /// - /// The following properties are supported: - /// - /// * name: The name of the event source. - /// * loghost: The name of the host where the Event Log service is running. - /// The default is "localhost". - /// * host: same as host. - /// * logfile: The name of the log file. The default is "Application". - - std::string getProperty(const std::string & name) const; - /// Returns the value of the given property. - - static const std::string PROP_NAME; - static const std::string PROP_HOST; - static const std::string PROP_LOGHOST; - static const std::string PROP_LOGFILE; - -protected: - ~EventLogChannel(); - static int getType(const Message & msg); - static int getCategory(const Message & msg); - void setUpRegistry() const; - static std::string findLibrary(const char * name); - -private: - std::string _name; - std::string _host; - std::string _logFile; - HANDLE _h; -}; - - -} // namespace Poco - - -#endif // Foundation_EventLogChannel_INCLUDED diff --git a/base/poco/Foundation/include/Poco/FPEnvironment_DUMMY.h b/base/poco/Foundation/include/Poco/FPEnvironment_DUMMY.h deleted file mode 100644 index 3d8f3612a7a..00000000000 --- a/base/poco/Foundation/include/Poco/FPEnvironment_DUMMY.h +++ /dev/null @@ -1,126 +0,0 @@ -// -// FPEnvironment_DUMMY.h -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Definition of class FPEnvironmentImpl for platforms that do not -// support IEEE 754 extensions. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_FPEnvironment_DUMMY_INCLUDED -#define Foundation_FPEnvironment_DUMMY_INCLUDED - - -#include -#include "Poco/Foundation.h" - - -namespace Poco -{ - - -class Foundation_API FPEnvironmentImpl -{ -protected: - enum RoundingModeImpl - { - FP_ROUND_DOWNWARD_IMPL, - FP_ROUND_UPWARD_IMPL, - FP_ROUND_TONEAREST_IMPL, - FP_ROUND_TOWARDZERO_IMPL - }; - enum FlagImpl - { - FP_DIVIDE_BY_ZERO_IMPL, - FP_INEXACT_IMPL, - FP_OVERFLOW_IMPL, - FP_UNDERFLOW_IMPL, - FP_INVALID_IMPL - }; - FPEnvironmentImpl(); - FPEnvironmentImpl(const FPEnvironmentImpl & env); - ~FPEnvironmentImpl(); - FPEnvironmentImpl & operator=(const FPEnvironmentImpl & env); - void keepCurrentImpl(); - static void clearFlagsImpl(); - static bool isFlagImpl(FlagImpl flag); - static void setRoundingModeImpl(RoundingModeImpl mode); - static RoundingModeImpl getRoundingModeImpl(); - static bool isInfiniteImpl(float value); - static bool isInfiniteImpl(double value); - static bool isInfiniteImpl(long double value); - static bool isNaNImpl(float value); - static bool isNaNImpl(double value); - static bool isNaNImpl(long double value); - static float copySignImpl(float target, float source); - static double copySignImpl(double target, double source); - static long double copySignImpl(long double target, long double source); - -private: - static RoundingModeImpl _roundingMode; -}; - - -// -// inlines -// -inline bool FPEnvironmentImpl::isInfiniteImpl(float value) -{ - return std::isinf(value) != 0; -} - - -inline bool FPEnvironmentImpl::isInfiniteImpl(double value) -{ - return std::isinf(value) != 0; -} - - -inline bool FPEnvironmentImpl::isInfiniteImpl(long double value) -{ - return std::isinf((double)value) != 0; -} - - -inline bool FPEnvironmentImpl::isNaNImpl(float value) -{ - return std::isnan(value) != 0; -} - - -inline bool FPEnvironmentImpl::isNaNImpl(double value) -{ - return std::isnan(value) != 0; -} - - -inline bool FPEnvironmentImpl::isNaNImpl(long double value) -{ - return std::isnan((double)value) != 0; -} - - -inline float FPEnvironmentImpl::copySignImpl(float target, float source) -{ - return copysignf(target, source); -} - - -inline double FPEnvironmentImpl::copySignImpl(double target, double source) -{ - return copysign(target, source); -} - - -} // namespace Poco - - -#endif // Foundation_FPEnvironment_DUMMY_INCLUDED diff --git a/base/poco/Foundation/include/Poco/FileStream_WIN32.h b/base/poco/Foundation/include/Poco/FileStream_WIN32.h deleted file mode 100644 index 7d6670b9315..00000000000 --- a/base/poco/Foundation/include/Poco/FileStream_WIN32.h +++ /dev/null @@ -1,72 +0,0 @@ -// -// FileStream_WIN32.h -// -// Library: Foundation -// Package: Streams -// Module: FileStream -// -// Definition of the FileStreamBuf, FileInputStream and FileOutputStream classes. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_FileStream_WIN32_INCLUDED -#define Foundation_FileStream_WIN32_INCLUDED - - -#include "Poco/BufferedBidirectionalStreamBuf.h" -#include "Poco/Foundation.h" -#include "Poco/UnWindows.h" - - -namespace Poco -{ - - -class Foundation_API FileStreamBuf : public BufferedBidirectionalStreamBuf -/// This stream buffer handles Fileio -{ -public: - FileStreamBuf(); - /// Creates a FileStreamBuf. - - ~FileStreamBuf(); - /// Destroys the FileStream. - - void open(const std::string & path, std::ios::openmode mode); - /// Opens the given file in the given mode. - - bool close(); - /// Closes the File stream buffer. Returns true if successful, - /// false otherwise. - - std::streampos seekoff(std::streamoff off, std::ios::seekdir dir, std::ios::openmode mode = std::ios::in | std::ios::out); - /// change position by offset, according to way and mode - - std::streampos seekpos(std::streampos pos, std::ios::openmode mode = std::ios::in | std::ios::out); - /// change to specified position, according to mode - -protected: - enum - { - BUFFER_SIZE = 4096 - }; - - int readFromDevice(char * buffer, std::streamsize length); - int writeToDevice(const char * buffer, std::streamsize length); - -private: - std::string _path; - HANDLE _handle; - UInt64 _pos; -}; - - -} // namespace Poco - - -#endif // Foundation_FileStream_WIN32_INCLUDED diff --git a/base/poco/Foundation/include/Poco/HashSet.h b/base/poco/Foundation/include/Poco/HashSet.h deleted file mode 100644 index 8082b2813f4..00000000000 --- a/base/poco/Foundation/include/Poco/HashSet.h +++ /dev/null @@ -1,176 +0,0 @@ -// -// HashSet.h -// -// Library: Foundation -// Package: Hashing -// Module: HashSet -// -// Definition of the HashSet class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_HashSet_INCLUDED -#define Foundation_HashSet_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/LinearHashTable.h" - - -namespace Poco -{ - - -template > -class HashSet -/// This class implements a set using a LinearHashTable. -/// -/// A HashSet can be used just like a std::set. -{ -public: - typedef Value ValueType; - typedef Value & Reference; - typedef const Value & ConstReference; - typedef Value * Pointer; - typedef const Value * ConstPointer; - typedef HashFunc Hash; - - typedef LinearHashTable HashTable; - - typedef typename HashTable::Iterator Iterator; - typedef typename HashTable::ConstIterator ConstIterator; - - HashSet() - /// Creates an empty HashSet. - { - } - - HashSet(std::size_t initialReserve) : _table(initialReserve) - /// Creates the HashSet, using the given initialReserve. - { - } - - HashSet(const HashSet & set) : _table(set._table) - /// Creates the HashSet by copying another one. - { - } - - ~HashSet() - /// Destroys the HashSet. - { - } - - HashSet & operator=(const HashSet & table) - /// Assigns another HashSet. - { - HashSet tmp(table); - swap(tmp); - return *this; - } - - void swap(HashSet & set) - /// Swaps the HashSet with another one. - { - _table.swap(set._table); - } - - ConstIterator begin() const - /// Returns an iterator pointing to the first entry, if one exists. - { - return _table.begin(); - } - - ConstIterator end() const - /// Returns an iterator pointing to the end of the table. - { - return _table.end(); - } - - Iterator begin() - /// Returns an iterator pointing to the first entry, if one exists. - { - return _table.begin(); - } - - Iterator end() - /// Returns an iterator pointing to the end of the table. - { - return _table.end(); - } - - ConstIterator find(const ValueType & value) const - /// Finds an entry in the table. - { - return _table.find(value); - } - - Iterator find(const ValueType & value) - /// Finds an entry in the table. - { - return _table.find(value); - } - - std::size_t count(const ValueType & value) const - /// Returns the number of elements with the given - /// value, with is either 1 or 0. - { - return _table.count(value); - } - - std::pair insert(const ValueType & value) - /// Inserts an element into the set. - /// - /// If the element already exists in the set, - /// a pair(iterator, false) with iterator pointing to the - /// existing element is returned. - /// Otherwise, the element is inserted an a - /// pair(iterator, true) with iterator - /// pointing to the new element is returned. - { - return _table.insert(value); - } - - void erase(Iterator it) - /// Erases the element pointed to by it. - { - _table.erase(it); - } - - void erase(const ValueType & value) - /// Erases the element with the given value, if it exists. - { - _table.erase(value); - } - - void clear() - /// Erases all elements. - { - _table.clear(); - } - - std::size_t size() const - /// Returns the number of elements in the table. - { - return _table.size(); - } - - bool empty() const - /// Returns true iff the table is empty. - { - return _table.empty(); - } - -private: - HashTable _table; -}; - - -} // namespace Poco - - -#endif // Foundation_HashSet_INCLUDED diff --git a/base/poco/Foundation/include/Poco/HashTable.h b/base/poco/Foundation/include/Poco/HashTable.h deleted file mode 100644 index b160c9c2d6b..00000000000 --- a/base/poco/Foundation/include/Poco/HashTable.h +++ /dev/null @@ -1,352 +0,0 @@ -// -// HashTable.h -// -// Library: Foundation -// Package: Hashing -// Module: HashTable -// -// Definition of the HashTable class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_HashTable_INCLUDED -#define Foundation_HashTable_INCLUDED - - -#include -#include -#include -#include -#include "Poco/Exception.h" -#include "Poco/Foundation.h" -#include "Poco/HashFunction.h" -#include "Poco/HashStatistic.h" - - -namespace Poco -{ - - -//@ deprecated -template > -class HashTable -/// A HashTable stores a key value pair that can be looked up via a hashed key. -/// -/// Collision handling is done via overflow maps(!). With small hash tables performance of this -/// data struct will be closer to that a map than a hash table, i.e. slower. On the plus side, -/// this class offers remove operations. Also HashTable full errors are not possible. If a fast -/// HashTable implementation is needed and the remove operation is not required, use SimpleHashTable -/// instead. -/// -/// This class is NOT thread safe. -{ -public: - typedef std::map HashEntryMap; - typedef HashEntryMap ** HashTableVector; - - typedef typename HashEntryMap::const_iterator ConstIterator; - typedef typename HashEntryMap::iterator Iterator; - - HashTable(UInt32 initialSize = 251) : _entries(0), _size(0), _maxCapacity(initialSize) - /// Creates the HashTable. - { - _entries = new HashEntryMap *[initialSize]; - memset(_entries, '\0', sizeof(HashEntryMap *) * initialSize); - } - - HashTable(const HashTable & ht) : _entries(new HashEntryMap *[ht._maxCapacity]), _size(ht._size), _maxCapacity(ht._maxCapacity) - { - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - if (ht._entries[i]) - _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end()); - else - _entries[i] = 0; - } - } - - ~HashTable() - /// Destroys the HashTable. - { - clear(); - } - - HashTable & operator=(const HashTable & ht) - { - if (this != &ht) - { - clear(); - _maxCapacity = ht._maxCapacity; - poco_assert_dbg(_entries == 0); - _entries = new HashEntryMap *[_maxCapacity]; - _size = ht._size; - - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - if (ht._entries[i]) - _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end()); - else - _entries[i] = 0; - } - } - return *this; - } - - void clear() - { - if (!_entries) - return; - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - delete _entries[i]; - } - delete[] _entries; - _entries = 0; - _size = 0; - _maxCapacity = 0; - } - - UInt32 insert(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - UInt32 hsh = hash(key); - insertRaw(key, hsh, value); - return hsh; - } - - Value & insertRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - if (!_entries[hsh]) - _entries[hsh] = new HashEntryMap(); - std::pair res(_entries[hsh]->insert(std::make_pair(key, value))); - if (!res.second) - throw InvalidArgumentException("HashTable::insert, key already exists."); - _size++; - return res.first->second; - } - - UInt32 update(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - UInt32 hsh = hash(key); - updateRaw(key, hsh, value); - return hsh; - } - - void updateRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - if (!_entries[hsh]) - _entries[hsh] = new HashEntryMap(); - std::pair res = _entries[hsh]->insert(std::make_pair(key, value)); - if (res.second == false) - res.first->second = value; - else - _size++; - } - - void remove(const Key & key) - { - UInt32 hsh = hash(key); - removeRaw(key, hsh); - } - - void removeRaw(const Key & key, UInt32 hsh) - /// Performance version, allows to specify the hash value - { - if (_entries[hsh]) - { - _size -= _entries[hsh]->erase(key); - } - } - - UInt32 hash(const Key & key) const { return _hash(key, _maxCapacity); } - - const Value & get(const Key & key) const - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return getRaw(key, hsh); - } - - const Value & getRaw(const Key & key, UInt32 hsh) const - /// Throws an exception if the value does not exist - { - if (!_entries[hsh]) - throw InvalidArgumentException("key not found"); - - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - throw InvalidArgumentException("key not found"); - - return it->second; - } - - Value & get(const Key & key) - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return const_cast(getRaw(key, hsh)); - } - - const Value & operator[](const Key & key) const { return get(key); } - - Value & operator[](const Key & key) - { - UInt32 hsh = hash(key); - - if (!_entries[hsh]) - return insertRaw(key, hsh, Value()); - - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - return insertRaw(key, hsh, Value()); - - return it->second; - } - - const Key & getKeyRaw(const Key & key, UInt32 hsh) - /// Throws an exception if the key does not exist. returns a reference to the internally - /// stored key. Useful when someone does an insert and wants for performance reason only to store - /// a pointer to the key in another collection - { - if (!_entries[hsh]) - throw InvalidArgumentException("key not found"); - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - throw InvalidArgumentException("key not found"); - return it->first; - } - - bool get(const Key & key, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - UInt32 hsh = hash(key); - return getRaw(key, hsh, v); - } - - bool getRaw(const Key & key, UInt32 hsh, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - if (!_entries[hsh]) - return false; - - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - return false; - - v = it->second; - return true; - } - - bool exists(const Key & key) - { - UInt32 hsh = hash(key); - return existsRaw(key, hsh); - } - - bool existsRaw(const Key & key, UInt32 hsh) { return _entries[hsh] && (_entries[hsh]->end() != _entries[hsh]->find(key)); } - - std::size_t size() const - /// Returns the number of elements already inserted into the HashTable - { - return _size; - } - - UInt32 maxCapacity() const { return _maxCapacity; } - - void resize(UInt32 newSize) - /// Resizes the hashtable, rehashes all existing entries. Expensive! - { - if (_maxCapacity != newSize) - { - HashTableVector cpy = _entries; - _entries = 0; - UInt32 oldSize = _maxCapacity; - _maxCapacity = newSize; - _entries = new HashEntryMap *[_maxCapacity]; - memset(_entries, '\0', sizeof(HashEntryMap *) * _maxCapacity); - - if (_size == 0) - { - // no data was yet inserted - delete[] cpy; - return; - } - _size = 0; - for (UInt32 i = 0; i < oldSize; ++i) - { - if (cpy[i]) - { - ConstIterator it = cpy[i]->begin(); - ConstIterator itEnd = cpy[i]->end(); - for (; it != itEnd; ++it) - { - insert(it->first, it->second); - } - delete cpy[i]; - } - } - delete[] cpy; - } - } - - HashStatistic currentState(bool details = false) const - /// Returns the current internal state - { - UInt32 numberOfEntries = (UInt32)_size; - UInt32 numZeroEntries = 0; - UInt32 maxEntriesPerHash = 0; - std::vector detailedEntriesPerHash; -#ifdef _DEBUG - UInt32 totalSize = 0; -#endif - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - if (_entries[i]) - { - UInt32 size = (UInt32)_entries[i]->size(); - poco_assert_dbg(size != 0); - if (size > maxEntriesPerHash) - maxEntriesPerHash = size; - if (details) - detailedEntriesPerHash.push_back(size); -#ifdef _DEBUG - totalSize += size; -#endif - } - else - { - numZeroEntries++; - if (details) - detailedEntriesPerHash.push_back(0); - } - } -#ifdef _DEBUG - poco_assert_dbg(totalSize == numberOfEntries); -#endif - return HashStatistic(_maxCapacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash); - } - -private: - HashTableVector _entries; - std::size_t _size; - UInt32 _maxCapacity; - KeyHashFunction _hash; -}; - - -} // namespace Poco - - -#endif // Foundation_HashTable_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Latin1Encoding.h b/base/poco/Foundation/include/Poco/Latin1Encoding.h deleted file mode 100644 index 279ecd477f4..00000000000 --- a/base/poco/Foundation/include/Poco/Latin1Encoding.h +++ /dev/null @@ -1,52 +0,0 @@ -// -// Latin1Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Latin1Encoding -// -// Definition of the Latin1Encoding class. -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Latin1Encoding_INCLUDED -#define Foundation_Latin1Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Latin1Encoding : public TextEncoding -/// ISO Latin-1 (8859-1) text encoding. -{ -public: - Latin1Encoding(); - ~Latin1Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Latin1Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Latin2Encoding.h b/base/poco/Foundation/include/Poco/Latin2Encoding.h deleted file mode 100644 index b86c183de17..00000000000 --- a/base/poco/Foundation/include/Poco/Latin2Encoding.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// Latin2Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Latin2Encoding -// -// Definition of the Latin2Encoding class. -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Latin2Encoding_INCLUDED -#define Foundation_Latin2Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Latin2Encoding : public TextEncoding -/// ISO Latin-2 (8859-2) text encoding. -/// -/// Latin-2 is basically Latin-1 with the EURO sign plus -/// some other minor changes. -{ -public: - Latin2Encoding(); - virtual ~Latin2Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Latin2Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Latin9Encoding.h b/base/poco/Foundation/include/Poco/Latin9Encoding.h deleted file mode 100644 index db672e15bba..00000000000 --- a/base/poco/Foundation/include/Poco/Latin9Encoding.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// Latin9Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Latin9Encoding -// -// Definition of the Latin9Encoding class. -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Latin9Encoding_INCLUDED -#define Foundation_Latin9Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Latin9Encoding : public TextEncoding -/// ISO Latin-9 (8859-15) text encoding. -/// -/// Latin-9 is basically Latin-1 with the EURO sign plus -/// some other minor changes. -{ -public: - Latin9Encoding(); - ~Latin9Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Latin9Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/MD4Engine.h b/base/poco/Foundation/include/Poco/MD4Engine.h deleted file mode 100644 index a772726955e..00000000000 --- a/base/poco/Foundation/include/Poco/MD4Engine.h +++ /dev/null @@ -1,96 +0,0 @@ -// -// MD4Engine.h -// -// Library: Foundation -// Package: Crypt -// Module: MD4Engine -// -// Definition of class MD4Engine. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// -// -// MD4 (RFC 1320) algorithm: -// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -// rights reserved. -// -// License to copy and use this software is granted provided that it -// is identified as the "RSA Data Security, Inc. MD4 Message-Digest -// Algorithm" in all material mentioning or referencing this software -// or this function. -// -// License is also granted to make and use derivative works provided -// that such works are identified as "derived from the RSA Data -// Security, Inc. MD4 Message-Digest Algorithm" in all material -// mentioning or referencing the derived work. -// -// RSA Data Security, Inc. makes no representations concerning either -// the merchantability of this software or the suitability of this -// software for any particular purpose. It is provided "as is" -// without express or implied warranty of any kind. -// -// These notices must be retained in any copies of any part of this -// documentation and/or software. -// - - -#ifndef Foundation_MD4Engine_INCLUDED -#define Foundation_MD4Engine_INCLUDED - - -#include "Poco/DigestEngine.h" -#include "Poco/Foundation.h" - - -namespace Poco -{ - - -class Foundation_API MD4Engine : public DigestEngine -/// This class implements the MD4 message digest algorithm, -/// described in RFC 1320. -{ -public: - enum - { - BLOCK_SIZE = 64, - DIGEST_SIZE = 16 - }; - - MD4Engine(); - ~MD4Engine(); - - std::size_t digestLength() const; - void reset(); - const DigestEngine::Digest & digest(); - -protected: - void updateImpl(const void * data, std::size_t length); - -private: - static void transform(UInt32 state[4], const unsigned char block[64]); - static void encode(unsigned char * output, const UInt32 * input, std::size_t len); - static void decode(UInt32 * output, const unsigned char * input, std::size_t len); - - struct Context - { - UInt32 state[4]; // state (ABCD) - UInt32 count[2]; // number of bits, modulo 2^64 (lsb first) - unsigned char buffer[64]; // input buffer - }; - - Context _context; - DigestEngine::Digest _digest; - - MD4Engine(const MD4Engine &); - MD4Engine & operator=(const MD4Engine &); -}; - - -} // namespace Poco - - -#endif // Foundation_MD5Engine_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Manifest.h b/base/poco/Foundation/include/Poco/Manifest.h deleted file mode 100644 index 1835d6e64ce..00000000000 --- a/base/poco/Foundation/include/Poco/Manifest.h +++ /dev/null @@ -1,152 +0,0 @@ -// -// Manifest.h -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Definition of the Manifest class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Manifest_INCLUDED -#define Foundation_Manifest_INCLUDED - - -#include -#include -#include "Poco/Foundation.h" -#include "Poco/MetaObject.h" - - -namespace Poco -{ - - -class Foundation_API ManifestBase -/// ManifestBase is a common base class for -/// all instantiations of Manifest. -{ -public: - ManifestBase(); - virtual ~ManifestBase(); - - virtual const char * className() const = 0; - /// Returns the type name of the manifest's class. -}; - - -template -class Manifest : public ManifestBase -/// A Manifest maintains a list of all classes -/// contained in a dynamically loadable class -/// library. -/// Internally, the information is held -/// in a map. An iterator is provided to -/// iterate over all the classes in a Manifest. -{ -public: - typedef AbstractMetaObject Meta; - typedef std::map MetaMap; - - class Iterator - /// The Manifest's very own iterator class. - { - public: - Iterator(const typename MetaMap::const_iterator & it) { _it = it; } - Iterator(const Iterator & it) { _it = it._it; } - ~Iterator() { } - Iterator & operator=(const Iterator & it) - { - _it = it._it; - return *this; - } - inline bool operator==(const Iterator & it) const { return _it == it._it; } - inline bool operator!=(const Iterator & it) const { return _it != it._it; } - Iterator & operator++() // prefix - { - ++_it; - return *this; - } - Iterator operator++(int) // postfix - { - Iterator result(_it); - ++_it; - return result; - } - inline const Meta * operator*() const { return _it->second; } - inline const Meta * operator->() const { return _it->second; } - - private: - typename MetaMap::const_iterator _it; - }; - - Manifest() - /// Creates an empty Manifest. - { - } - - virtual ~Manifest() - /// Destroys the Manifest. - { - clear(); - } - - Iterator find(const std::string & className) const - /// Returns an iterator pointing to the MetaObject - /// for the given class. If the MetaObject cannot - /// be found, the iterator points to end(). - { - return Iterator(_metaMap.find(className)); - } - - Iterator begin() const { return Iterator(_metaMap.begin()); } - - Iterator end() const { return Iterator(_metaMap.end()); } - - bool insert(const Meta * pMeta) - /// Inserts a MetaObject. Returns true if insertion - /// was successful, false if a class with the same - /// name already exists. - { - return _metaMap.insert(typename MetaMap::value_type(pMeta->name(), pMeta)).second; - } - - void clear() - /// Removes all MetaObjects from the manifest. - { - for (typename MetaMap::iterator it = _metaMap.begin(); it != _metaMap.end(); ++it) - { - delete it->second; - } - _metaMap.clear(); - } - - int size() const - /// Returns the number of MetaObjects in the Manifest. - { - return int(_metaMap.size()); - } - - bool empty() const - /// Returns true iff the Manifest does not contain any MetaObjects. - { - return _metaMap.empty(); - } - - const char * className() const { return typeid(*this).name(); } - -private: - MetaMap _metaMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Manifest_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index f78b3ebb6a1..e8f04888ab4 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -105,6 +105,8 @@ public: const std::string & getText() const; /// Returns the text of the message. + void appendText(const std::string & text); + void setPriority(Priority prio); /// Sets the priority of the message. diff --git a/base/poco/Foundation/include/Poco/NumericString.h b/base/poco/Foundation/include/Poco/NumericString.h index 7c9628c7f16..cc5c011e6ed 100644 --- a/base/poco/Foundation/include/Poco/NumericString.h +++ b/base/poco/Foundation/include/Poco/NumericString.h @@ -30,9 +30,6 @@ #include #include #include -#if !defined(POCO_NO_LOCALE) -# include -#endif // binary numbers are supported, thus 64 (bits) + 1 (string terminating zero) @@ -53,11 +50,7 @@ inline char decimalSeparator() /// Returns decimal separator from global locale or /// default '.' for platforms where locale is unavailable. { -#if !defined(POCO_NO_LOCALE) - return std::use_facet>(std::locale()).decimal_point(); -#else return '.'; -#endif } @@ -65,11 +58,7 @@ inline char thousandSeparator() /// Returns thousand separator from global locale or /// default ',' for platforms where locale is unavailable. { -#if !defined(POCO_NO_LOCALE) - return std::use_facet>(std::locale()).thousands_sep(); -#else return ','; -#endif } diff --git a/base/poco/Foundation/include/Poco/PipeImpl_DUMMY.h b/base/poco/Foundation/include/Poco/PipeImpl_DUMMY.h deleted file mode 100644 index c707e5f6f0f..00000000000 --- a/base/poco/Foundation/include/Poco/PipeImpl_DUMMY.h +++ /dev/null @@ -1,50 +0,0 @@ -// -// PipeImpl_DUMMY.h -// -// Library: Foundation -// Package: Processes -// Module: PipeImpl -// -// Definition of the PipeImpl_DUMMY class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_PipeImpl_DUMMY_INCLUDED -#define Foundation_PipeImpl_DUMMY_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/RefCountedObject.h" - - -namespace Poco -{ - - -class Foundation_API PipeImpl : public RefCountedObject -/// A dummy implementation of PipeImpl for platforms -/// that do not support pipes. -{ -public: - typedef int Handle; - - PipeImpl(); - ~PipeImpl(); - int writeBytes(const void * buffer, int length); - int readBytes(void * buffer, int length); - Handle readHandle() const; - Handle writeHandle() const; - void closeRead(); - void closeWrite(); -}; - - -} // namespace Poco - - -#endif // Foundation_PipeImpl_DUMMY_INCLUDED diff --git a/base/poco/Foundation/include/Poco/PipeStream.h b/base/poco/Foundation/include/Poco/PipeStream.h deleted file mode 100644 index a797cade010..00000000000 --- a/base/poco/Foundation/include/Poco/PipeStream.h +++ /dev/null @@ -1,121 +0,0 @@ -// -// PipeStream.h -// -// Library: Foundation -// Package: Processes -// Module: PipeStream -// -// Definition of the PipeStream class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_PipeStream_INCLUDED -#define Foundation_PipeStream_INCLUDED - - -#include -#include -#include "Poco/BufferedStreamBuf.h" -#include "Poco/Foundation.h" -#include "Poco/Pipe.h" - - -namespace Poco -{ - - -class Foundation_API PipeStreamBuf : public BufferedStreamBuf -/// This is the streambuf class used for reading from and writing to a Pipe. -{ -public: - typedef BufferedStreamBuf::openmode openmode; - - PipeStreamBuf(const Pipe & pipe, openmode mode); - /// Creates a PipeStreamBuf with the given Pipe. - - ~PipeStreamBuf(); - /// Destroys the PipeStreamBuf. - - void close(); - /// Closes the pipe. - -protected: - int readFromDevice(char * buffer, std::streamsize length); - int writeToDevice(const char * buffer, std::streamsize length); - -private: - enum - { - STREAM_BUFFER_SIZE = 1024 - }; - - Pipe _pipe; -}; - - -class Foundation_API PipeIOS : public virtual std::ios -/// The base class for PipeInputStream and -/// PipeOutputStream. -/// -/// This class is needed to ensure the correct initialization -/// order of the stream buffer and base classes. -{ -public: - PipeIOS(const Pipe & pipe, openmode mode); - /// Creates the PipeIOS with the given Pipe. - - ~PipeIOS(); - /// Destroys the PipeIOS. - /// - /// Flushes the buffer, but does not close the pipe. - - PipeStreamBuf * rdbuf(); - /// Returns a pointer to the internal PipeStreamBuf. - - void close(); - /// Flushes the stream and closes the pipe. - -protected: - PipeStreamBuf _buf; -}; - - -class Foundation_API PipeOutputStream : public PipeIOS, public std::ostream -/// An output stream for writing to a Pipe. -{ -public: - PipeOutputStream(const Pipe & pipe); - /// Creates the PipeOutputStream with the given Pipe. - - ~PipeOutputStream(); - /// Destroys the PipeOutputStream. - /// - /// Flushes the buffer, but does not close the pipe. -}; - - -class Foundation_API PipeInputStream : public PipeIOS, public std::istream -/// An input stream for reading from a Pipe. -/// -/// Using formatted input from a PipeInputStream -/// is not recommended, due to the read-ahead behavior of -/// istream with formatted reads. -{ -public: - PipeInputStream(const Pipe & pipe); - /// Creates the PipeInputStream with the given Pipe. - - ~PipeInputStream(); - /// Destroys the PipeInputStream. -}; - - -} // namespace Poco - - -#endif // Foundation_PipeStream_INCLUDED diff --git a/base/poco/Foundation/include/Poco/SharedMemory_DUMMY.h b/base/poco/Foundation/include/Poco/SharedMemory_DUMMY.h deleted file mode 100644 index da5269dabad..00000000000 --- a/base/poco/Foundation/include/Poco/SharedMemory_DUMMY.h +++ /dev/null @@ -1,89 +0,0 @@ -// -// SharedMemoryImpl.h -// -// Library: Foundation -// Package: Processes -// Module: SharedMemoryImpl -// -// Definition of the SharedMemoryImpl class. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_SharedMemoryImpl_INCLUDED -#define Foundation_SharedMemoryImpl_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/RefCountedObject.h" -#include "Poco/SharedMemory.h" - - -namespace Poco -{ - - -class Foundation_API SharedMemoryImpl : public RefCountedObject -/// A dummy implementation of shared memory, for systems -/// that do not have shared memory support. -{ -public: - SharedMemoryImpl(const std::string & id, std::size_t size, SharedMemory::AccessMode mode, const void * addr, bool server); - /// Creates or connects to a shared memory object with the given name. - /// - /// For maximum portability, name should be a valid Unix filename and not - /// contain any slashes or backslashes. - /// - /// An address hint can be passed to the system, specifying the desired - /// start address of the shared memory area. Whether the hint - /// is actually honored is, however, up to the system. Windows platform - /// will generally ignore the hint. - - SharedMemoryImpl(const Poco::File & aFile, SharedMemory::AccessMode mode, const void * addr); - /// Maps the entire contents of file into a shared memory segment. - /// - /// An address hint can be passed to the system, specifying the desired - /// start address of the shared memory area. Whether the hint - /// is actually honored is, however, up to the system. Windows platform - /// will generally ignore the hint. - - char * begin() const; - /// Returns the start address of the shared memory segment. - - char * end() const; - /// Returns the one-past-end end address of the shared memory segment. - -protected: - ~SharedMemoryImpl(); - /// Destroys the SharedMemoryImpl. - -private: - SharedMemoryImpl(); - SharedMemoryImpl(const SharedMemoryImpl &); - SharedMemoryImpl & operator=(const SharedMemoryImpl &); -}; - - -// -// inlines -// -inline char * SharedMemoryImpl::begin() const -{ - return 0; -} - - -inline char * SharedMemoryImpl::end() const -{ - return 0; -} - - -} // namespace Poco - - -#endif // Foundation_SharedMemoryImpl_INCLUDED diff --git a/base/poco/Foundation/include/Poco/SimpleHashTable.h b/base/poco/Foundation/include/Poco/SimpleHashTable.h deleted file mode 100644 index b0fcd99aafa..00000000000 --- a/base/poco/Foundation/include/Poco/SimpleHashTable.h +++ /dev/null @@ -1,387 +0,0 @@ -// -// SimpleHashTable.h -// -// Library: Foundation -// Package: Hashing -// Module: SimpleHashTable -// -// Definition of the SimpleHashTable class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_SimpleHashTable_INCLUDED -#define Foundation_SimpleHashTable_INCLUDED - - -#include -#include -#include -#include -#include "Poco/Exception.h" -#include "Poco/Foundation.h" -#include "Poco/HashFunction.h" -#include "Poco/HashStatistic.h" - - -namespace Poco -{ - - -//@ deprecated -template > -class SimpleHashTable -/// A SimpleHashTable stores a key value pair that can be looked up via a hashed key. -/// -/// In comparison to a HashTable, this class handles collisions by sequentially searching the next -/// free location. This also means that the maximum size of this table is limited, i.e. if the hash table -/// is full, it will throw an exception and that this class does not support remove operations. -/// On the plus side it is faster than the HashTable. -/// -/// This class is NOT thread safe. -{ -public: - class HashEntry - { - public: - Key key; - Value value; - HashEntry(const Key k, const Value v) : key(k), value(v) { } - }; - - typedef std::vector HashTableVector; - - SimpleHashTable(UInt32 capacity = 251) : _entries(capacity, 0), _size(0), _capacity(capacity) - /// Creates the SimpleHashTable. - { - } - - SimpleHashTable(const SimpleHashTable & ht) : _size(ht._size), _capacity(ht._capacity) - { - _entries.reserve(ht._capacity); - for (typename HashTableVector::iterator it = ht._entries.begin(); it != ht._entries.end(); ++it) - { - if (*it) - _entries.push_back(new HashEntry(*it)); - else - _entries.push_back(0); - } - } - - ~SimpleHashTable() - /// Destroys the SimpleHashTable. - { - clear(); - } - - SimpleHashTable & operator=(const SimpleHashTable & ht) - { - if (this != &ht) - { - SimpleHashTable tmp(ht); - swap(tmp); - } - return *this; - } - - void swap(SimpleHashTable & ht) - { - using std::swap; - swap(_entries, ht._entries); - swap(_size, ht._size); - swap(_capacity, ht._capacity); - } - - void clear() - { - for (typename HashTableVector::iterator it = _entries.begin(); it != _entries.end(); ++it) - { - delete *it; - *it = 0; - } - _size = 0; - } - - UInt32 insert(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - UInt32 hsh = hash(key); - insertRaw(key, hsh, value); - return hsh; - } - - Value & insertRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - UInt32 pos = hsh; - if (!_entries[pos]) - _entries[pos] = new HashEntry(key, value); - else - { - UInt32 origHash = hsh; - while (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - throw ExistsException(); - if (hsh - origHash > _capacity) - throw PoolOverflowException("SimpleHashTable full"); - hsh++; - } - pos = hsh % _capacity; - _entries[pos] = new HashEntry(key, value); - } - _size++; - return _entries[pos]->value; - } - - UInt32 update(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - UInt32 hsh = hash(key); - updateRaw(key, hsh, value); - return hsh; - } - - void updateRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - if (!_entries[hsh]) - _entries[hsh] = new HashEntry(key, value); - else - { - UInt32 origHash = hsh; - while (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - _entries[hsh % _capacity]->value = value; - return; - } - if (hsh - origHash > _capacity) - throw PoolOverflowException("SimpleHashTable full"); - hsh++; - } - _entries[hsh % _capacity] = new HashEntry(key, value); - } - _size++; - } - - UInt32 hash(const Key & key) const { return _hash(key, _capacity); } - - const Value & get(const Key & key) const - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return getRaw(key, hsh); - } - - const Value & getRaw(const Key & key, UInt32 hsh) const - /// Throws an exception if the value does not exist - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return _entries[hsh % _capacity]->value; - } - } - else - throw InvalidArgumentException("value not found"); - if (hsh - origHash > _capacity) - throw InvalidArgumentException("value not found"); - hsh++; - } - } - - Value & get(const Key & key) - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return const_cast(getRaw(key, hsh)); - } - - const Value & operator[](const Key & key) const { return get(key); } - - Value & operator[](const Key & key) - { - UInt32 hsh = hash(key); - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return _entries[hsh % _capacity]->value; - } - } - else - return insertRaw(key, hsh, Value()); - if (hsh - origHash > _capacity) - return insertRaw(key, hsh, Value()); - hsh++; - } - } - - const Key & getKeyRaw(const Key & key, UInt32 hsh) - /// Throws an exception if the key does not exist. returns a reference to the internally - /// stored key. Useful when someone does an insert and wants for performance reason only to store - /// a pointer to the key in another collection - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return _entries[hsh % _capacity]->key; - } - } - else - throw InvalidArgumentException("key not found"); - - if (hsh - origHash > _capacity) - throw InvalidArgumentException("key not found"); - hsh++; - } - } - - bool get(const Key & key, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - UInt32 hsh = hash(key); - return getRaw(key, hsh, v); - } - - bool getRaw(const Key & key, UInt32 hsh, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - v = _entries[hsh % _capacity]->value; - return true; - } - } - else - return false; - if (hsh - origHash > _capacity) - return false; - hsh++; - } - } - - bool exists(const Key & key) const - { - UInt32 hsh = hash(key); - return existsRaw(key, hsh); - } - - bool existsRaw(const Key & key, UInt32 hsh) const - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return true; - } - } - else - return false; - if (hsh - origHash > _capacity) - return false; - hsh++; - } - } - - std::size_t size() const - /// Returns the number of elements already inserted into the SimpleHashTable - { - return _size; - } - - UInt32 capacity() const { return _capacity; } - - void resize(UInt32 newSize) - /// Resizes the hashtable, rehashes all existing entries. Expensive! - { - if (_capacity != newSize) - { - SimpleHashTable tmp(newSize); - swap(tmp); - for (typename HashTableVector::const_iterator it = tmp._entries.begin(); it != tmp._entries.end(); ++it) - { - if (*it) - { - insertRaw((*it)->key, hash((*it)->key), (*it)->value); - } - } - } - } - - HashStatistic currentState(bool details = false) const - /// Returns the current internal state - { - UInt32 numberOfEntries = (UInt32)_size; - UInt32 numZeroEntries = 0; - UInt32 maxEntriesPerHash = 0; - std::vector detailedEntriesPerHash; -#ifdef _DEBUG - UInt32 totalSize = 0; -#endif - for (int i = 0; i < _capacity; ++i) - { - if (_entries[i]) - { - maxEntriesPerHash = 1; - UInt32 size = 1; - if (details) - detailedEntriesPerHash.push_back(size); -#ifdef _DEBUG - totalSize += size; -#endif - } - else - { - numZeroEntries++; - if (details) - detailedEntriesPerHash.push_back(0); - } - } -#ifdef _DEBUG - poco_assert_dbg(totalSize == numberOfEntries); -#endif - return HashStatistic(_capacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash); - } - -private: - HashTableVector _entries; - std::size_t _size; - UInt32 _capacity; - KeyHashFunction _hash; -}; - - -} // namespace Poco - - -#endif // Foundation_HashTable_INCLUDED diff --git a/base/poco/Foundation/include/Poco/StreamTokenizer.h b/base/poco/Foundation/include/Poco/StreamTokenizer.h deleted file mode 100644 index bb08e71f81a..00000000000 --- a/base/poco/Foundation/include/Poco/StreamTokenizer.h +++ /dev/null @@ -1,98 +0,0 @@ -// -// StreamTokenizer.h -// -// Library: Foundation -// Package: Streams -// Module: StreamTokenizer -// -// Definition of the StreamTokenizer class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_StreamTokenizer_INCLUDED -#define Foundation_StreamTokenizer_INCLUDED - - -#include -#include -#include "Poco/Foundation.h" -#include "Poco/Token.h" - - -namespace Poco -{ - - -class Foundation_API StreamTokenizer -/// A stream tokenizer splits an input stream -/// into a sequence of tokens of different kinds. -/// Various token kinds can be registered with -/// the tokenizer. -{ -public: - StreamTokenizer(); - /// Creates a StreamTokenizer with no attached stream. - - StreamTokenizer(std::istream & istr); - /// Creates a StreamTokenizer with no attached stream. - - virtual ~StreamTokenizer(); - /// Destroys the StreamTokenizer and deletes all - /// registered tokens. - - void attachToStream(std::istream & istr); - /// Attaches the tokenizer to an input stream. - - void addToken(Token * pToken); - /// Adds a token class to the tokenizer. The - /// tokenizer takes ownership of the token and - /// deletes it when no longer needed. Comment - /// and whitespace tokens will be marked as - /// ignorable, which means that next() will not - /// return them. - - void addToken(Token * pToken, bool ignore); - /// Adds a token class to the tokenizer. The - /// tokenizer takes ownership of the token and - /// deletes it when no longer needed. - /// If ignore is true, the token will be marked - /// as ignorable, which means that next() will - /// not return it. - - const Token * next(); - /// Extracts the next token from the input stream. - /// Returns a pointer to an EOFToken if there are - /// no more characters to read. - /// Returns a pointer to an InvalidToken if an - /// invalid character is encountered. - /// If a token is marked as ignorable, it will not - /// be returned, and the next token will be - /// examined. - /// Never returns a NULL pointer. - /// You must not delete the token returned by next(). - -private: - struct TokenInfo - { - Token * pToken; - bool ignore; - }; - - typedef std::vector TokenVec; - - TokenVec _tokens; - std::istream * _pIstr; - InvalidToken _invalidToken; - EOFToken _eofToken; -}; - - -} // namespace Poco - - -#endif // Foundation_StreamTokenizer_INCLUDED diff --git a/base/poco/Foundation/include/Poco/SynchronizedObject.h b/base/poco/Foundation/include/Poco/SynchronizedObject.h deleted file mode 100644 index f0d4311b107..00000000000 --- a/base/poco/Foundation/include/Poco/SynchronizedObject.h +++ /dev/null @@ -1,132 +0,0 @@ -// -// SynchronizedObject.h -// -// Library: Foundation -// Package: Threading -// Module: SynchronizedObject -// -// Definition of the SynchronizedObject class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_SynchronizedObject_INCLUDED -#define Foundation_SynchronizedObject_INCLUDED - - -#include "Poco/Event.h" -#include "Poco/Foundation.h" -#include "Poco/Mutex.h" - - -namespace Poco -{ - - -class Foundation_API SynchronizedObject -/// This class aggregates a Mutex and an Event -/// and can act as a base class for all objects -/// requiring synchronization in a multithreaded -/// scenario. -{ -public: - typedef Poco::ScopedLock ScopedLock; - - SynchronizedObject(); - /// Creates the object. - - virtual ~SynchronizedObject(); - /// Destroys the object. - - void lock() const; - /// Locks the object. Blocks if the object - /// is locked by another thread. - - bool tryLock() const; - /// Tries to lock the object. Returns false immediately - /// if the object is already locked by another thread - /// Returns true if the object was successfully locked. - - void unlock() const; - /// Unlocks the object so that it can be locked by - /// other threads. - - void notify() const; - /// Signals the object. - /// Exactly only one thread waiting for the object - /// can resume execution. - - void wait() const; - /// Waits for the object to become signalled. - - void wait(long milliseconds) const; - /// Waits for the object to become signalled. - /// Throws a TimeoutException if the object - /// does not become signalled within the specified - /// time interval. - - bool tryWait(long milliseconds) const; - /// Waits for the object to become signalled. - /// Returns true if the object - /// became signalled within the specified - /// time interval, false otherwise. - -private: - mutable Mutex _mutex; - mutable Event _event; -}; - - -// -// inlines -// -inline void SynchronizedObject::lock() const -{ - _mutex.lock(); -} - - -inline bool SynchronizedObject::tryLock() const -{ - return _mutex.tryLock(); -} - - -inline void SynchronizedObject::unlock() const -{ - _mutex.unlock(); -} - - -inline void SynchronizedObject::notify() const -{ - _event.set(); -} - - -inline void SynchronizedObject::wait() const -{ - _event.wait(); -} - - -inline void SynchronizedObject::wait(long milliseconds) const -{ - _event.wait(milliseconds); -} - - -inline bool SynchronizedObject::tryWait(long milliseconds) const -{ - return _event.tryWait(milliseconds); -} - - -} // namespace Poco - - -#endif // Foundation_SynchronizedObject_INCLUDED diff --git a/base/poco/Foundation/include/Poco/UnWindows.h b/base/poco/Foundation/include/Poco/UnWindows.h deleted file mode 100644 index 1f3835b8af5..00000000000 --- a/base/poco/Foundation/include/Poco/UnWindows.h +++ /dev/null @@ -1,135 +0,0 @@ -// -// UnWindows.h -// -// Library: Foundation -// Package: Core -// Module: UnWindows -// -// A wrapper around the header file that #undef's some -// of the macros for function names defined by that -// are a frequent source of conflicts (e.g., GetUserName). -// -// Remember, that most of the WIN32 API functions come in two variants, -// an Unicode variant (e.g., GetUserNameA) and an ASCII variant (GetUserNameW). -// There is also a macro (GetUserName) that's either defined to be the Unicode -// name or the ASCII name, depending on whether the UNICODE macro is #define'd -// or not. POCO always calls the Unicode or ASCII functions directly (depending -// on whether POCO_WIN32_UTF8 is #define'd or not), so the macros are not ignored. -// -// These macro definitions are a frequent case of problems and naming conflicts, -// especially for C++ programmers. Say, you define a class with a member function named -// GetUserName. Depending on whether "Poco/UnWindows.h" has been included by a particular -// translation unit or not, this might be changed to GetUserNameA/GetUserNameW, or not. -// While, due to naming conventions used, this is less of a problem in POCO, some -// of the users of POCO might use a different naming convention where this can become -// a problem. -// -// To disable the #undef's, compile POCO with the POCO_NO_UNWINDOWS macro #define'd. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_UnWindows_INCLUDED -#define Foundation_UnWindows_INCLUDED - - -// Reduce bloat - - -// Microsoft Visual C++ includes copies of the Windows header files -// that were current at the time Visual C++ was released. -// The Windows header files use macros to indicate which versions -// of Windows support many programming elements. Therefore, you must -// define these macros to use new functionality introduced in each -// major operating system release. (Individual header files may use -// different macros; therefore, if compilation problems occur, check -// the header file that contains the definition for conditional -// definitions.) For more information, see SdkDdkVer.h. - - -# if defined(_WIN32_WINNT) -# if (_WIN32_WINNT < 0x0502) -# error Unsupported Windows version. -# endif -# elif defined(NTDDI_VERSION) -# if (NTDDI_VERSION < 0x05020000) -# error Unsupported Windows version. -# endif -# elif !defined(_WIN32_WINNT) -// Define minimum supported version. -// This can be changed, if needed. -// If allowed (see POCO_MIN_WINDOWS_OS_SUPPORT -// below), Platform_WIN32.h will do its -// best to determine the appropriate values -// and may redefine these. See Platform_WIN32.h -// for details. -# define _WIN32_WINNT 0x0502 -# define NTDDI_VERSION 0x05020000 -# endif - - -// To prevent Platform_WIN32.h to modify version defines, -// uncomment this, otherwise versions will be automatically -// discovered in Platform_WIN32.h. -// #define POCO_FORCE_MIN_WINDOWS_OS_SUPPORT - - -#include - - -#if !defined(POCO_NO_UNWINDOWS) -// A list of annoying macros to #undef. -// Extend as required. -# undef GetBinaryType -# undef GetShortPathName -# undef GetLongPathName -# undef GetEnvironmentStrings -# undef SetEnvironmentStrings -# undef FreeEnvironmentStrings -# undef FormatMessage -# undef EncryptFile -# undef DecryptFile -# undef CreateMutex -# undef OpenMutex -# undef CreateEvent -# undef OpenEvent -# undef CreateSemaphore -# undef OpenSemaphore -# undef LoadLibrary -# undef GetModuleFileName -# undef CreateProcess -# undef GetCommandLine -# undef GetEnvironmentVariable -# undef SetEnvironmentVariable -# undef ExpandEnvironmentStrings -# undef OutputDebugString -# undef FindResource -# undef UpdateResource -# undef FindAtom -# undef AddAtom -# undef GetSystemDirectory -# undef GetTempPath -# undef GetTempFileName -# undef SetCurrentDirectory -# undef GetCurrentDirectory -# undef CreateDirectory -# undef RemoveDirectory -# undef CreateFile -# undef DeleteFile -# undef SearchPath -# undef CopyFile -# undef MoveFile -# undef ReplaceFile -# undef GetComputerName -# undef SetComputerName -# undef GetUserName -# undef LogonUser -# undef GetVersion -# undef GetObject -#endif // POCO_NO_UNWINDOWS - -#endif // Foundation_UnWindows_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Windows1250Encoding.h b/base/poco/Foundation/include/Poco/Windows1250Encoding.h deleted file mode 100644 index 139c173949f..00000000000 --- a/base/poco/Foundation/include/Poco/Windows1250Encoding.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// Windows1250Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Windows1250Encoding -// -// Definition of the Windows1250Encoding class. -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Windows1250Encoding_INCLUDED -#define Foundation_Windows1250Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Windows1250Encoding : public TextEncoding -/// Windows Codepage 1250 text encoding. -/// Based on: http://msdn.microsoft.com/en-us/goglobal/cc305143 -{ -public: - Windows1250Encoding(); - ~Windows1250Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Windows1250Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Windows1251Encoding.h b/base/poco/Foundation/include/Poco/Windows1251Encoding.h deleted file mode 100644 index f2fe483a134..00000000000 --- a/base/poco/Foundation/include/Poco/Windows1251Encoding.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// Windows1251Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Windows1251Encoding -// -// Definition of the Windows1251Encoding class. -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Windows1251Encoding_INCLUDED -#define Foundation_Windows1251Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Windows1251Encoding : public TextEncoding -/// Windows Codepage 1251 text encoding. -/// Based on: http://msdn.microsoft.com/en-us/goglobal/cc305144 -{ -public: - Windows1251Encoding(); - ~Windows1251Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Windows1251Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Windows1252Encoding.h b/base/poco/Foundation/include/Poco/Windows1252Encoding.h deleted file mode 100644 index c1af357650b..00000000000 --- a/base/poco/Foundation/include/Poco/Windows1252Encoding.h +++ /dev/null @@ -1,52 +0,0 @@ -// -// Windows1252Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Windows1252Encoding -// -// Definition of the Windows1252Encoding class. -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Windows1252Encoding_INCLUDED -#define Foundation_Windows1252Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Windows1252Encoding : public TextEncoding -/// Windows Codepage 1252 text encoding. -{ -public: - Windows1252Encoding(); - ~Windows1252Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Windows1252Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/WindowsConsoleChannel.h b/base/poco/Foundation/include/Poco/WindowsConsoleChannel.h deleted file mode 100644 index 0473d030930..00000000000 --- a/base/poco/Foundation/include/Poco/WindowsConsoleChannel.h +++ /dev/null @@ -1,184 +0,0 @@ -// -// WindowsConsoleChannel.h -// -// Library: Foundation -// Package: Logging -// Module: WindowsConsoleChannel -// -// Definition of the WindowsConsoleChannel class. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_WindowsConsoleChannel_INCLUDED -#define Foundation_WindowsConsoleChannel_INCLUDED - - -#include "Poco/Channel.h" -#include "Poco/Foundation.h" -#include "Poco/Mutex.h" -#include "Poco/UnWindows.h" - - -namespace Poco -{ - - -class Foundation_API WindowsConsoleChannel : public Channel -/// A channel that writes to the Windows console. -/// -/// Only the message's text is written, followed -/// by a newline. -/// -/// If POCO has been compiled with POCO_WIN32_UTF8, -/// log messages are assumed to be UTF-8 encoded, and -/// are converted to UTF-16 prior to writing them to the -/// console. This is the main difference to the ConsoleChannel -/// class, which cannot handle UTF-8 encoded messages on Windows. -/// -/// Chain this channel to a FormattingChannel with an -/// appropriate Formatter to control what is contained -/// in the text. -/// -/// Only available on Windows platforms. -{ -public: - WindowsConsoleChannel(); - /// Creates the WindowsConsoleChannel. - - void log(const Message & msg); - /// Logs the given message to the channel's stream. - -protected: - ~WindowsConsoleChannel(); - -private: - HANDLE _hConsole; - bool _isFile; -}; - - -class Foundation_API WindowsColorConsoleChannel : public Channel -/// A channel that writes to the Windows console. -/// -/// Only the message's text is written, followed -/// by a newline. -/// -/// If POCO has been compiled with POCO_WIN32_UTF8, -/// log messages are assumed to be UTF-8 encoded, and -/// are converted to UTF-16 prior to writing them to the -/// console. This is the main difference to the ConsoleChannel -/// class, which cannot handle UTF-8 encoded messages on Windows. -/// -/// Messages can be colored depending on priority. -/// -/// To enable message coloring, set the "enableColors" -/// property to true (default). Furthermore, colors can be -/// configured by setting the following properties -/// (default values are given in parenthesis): -/// -/// * traceColor (gray) -/// * debugColor (gray) -/// * informationColor (default) -/// * noticeColor (default) -/// * warningColor (yellow) -/// * errorColor (lightRed) -/// * criticalColor (lightRed) -/// * fatalColor (lightRed) -/// -/// The following color values are supported: -/// -/// * default -/// * black -/// * red -/// * green -/// * brown -/// * blue -/// * magenta -/// * cyan -/// * gray -/// * darkgray -/// * lightRed -/// * lightGreen -/// * yellow -/// * lightBlue -/// * lightMagenta -/// * lightCyan -/// * white -/// -/// Chain this channel to a FormattingChannel with an -/// appropriate Formatter to control what is contained -/// in the text. -/// -/// Only available on Windows platforms. -{ -public: - WindowsColorConsoleChannel(); - /// Creates the WindowsConsoleChannel. - - void log(const Message & msg); - /// Logs the given message to the channel's stream. - - void setProperty(const std::string & name, const std::string & value); - /// Sets the property with the given name. - /// - /// The following properties are supported: - /// * enableColors: Enable or disable colors. - /// * traceColor: Specify color for trace messages. - /// * debugColor: Specify color for debug messages. - /// * informationColor: Specify color for information messages. - /// * noticeColor: Specify color for notice messages. - /// * warningColor: Specify color for warning messages. - /// * errorColor: Specify color for error messages. - /// * criticalColor: Specify color for critical messages. - /// * fatalColor: Specify color for fatal messages. - /// - /// See the class documentation for a list of supported color values. - - std::string getProperty(const std::string & name) const; - /// Returns the value of the property with the given name. - /// See setProperty() for a description of the supported - /// properties. - -protected: - enum Color - { - CC_BLACK = 0x0000, - CC_RED = 0x0004, - CC_GREEN = 0x0002, - CC_BROWN = 0x0006, - CC_BLUE = 0x0001, - CC_MAGENTA = 0x0005, - CC_CYAN = 0x0003, - CC_GRAY = 0x0007, - CC_DARKGRAY = 0x0008, - CC_LIGHTRED = 0x000C, - CC_LIGHTGREEN = 0x000A, - CC_YELLOW = 0x000E, - CC_LIGHTBLUE = 0x0009, - CC_LIGHTMAGENTA = 0x000D, - CC_LIGHTCYAN = 0x000B, - CC_WHITE = 0x000F - }; - - ~WindowsColorConsoleChannel(); - WORD parseColor(const std::string & color) const; - std::string formatColor(WORD color) const; - void initColors(); - -private: - bool _enableColors; - HANDLE _hConsole; - bool _isFile; - WORD _colors[9]; -}; - - -} // namespace Poco - - -#endif // Foundation_WindowsConsoleChannel_INCLUDED diff --git a/base/poco/Foundation/src/Base32Decoder.cpp b/base/poco/Foundation/src/Base32Decoder.cpp deleted file mode 100644 index ba9f9db5958..00000000000 --- a/base/poco/Foundation/src/Base32Decoder.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// -// Base32Decoder.cpp -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Base32Decoder.h" -#include "Poco/Base32Encoder.h" -#include "Poco/Exception.h" -#include "Poco/Mutex.h" -#include - - -namespace Poco { - - -unsigned char Base32DecoderBuf::IN_ENCODING[256]; -bool Base32DecoderBuf::IN_ENCODING_INIT = false; - - -namespace -{ - static FastMutex mutex; -} - - -Base32DecoderBuf::Base32DecoderBuf(std::istream& istr): - _groupLength(0), - _groupIndex(0), - _buf(*istr.rdbuf()) -{ - FastMutex::ScopedLock lock(mutex); - if (!IN_ENCODING_INIT) - { - for (unsigned i = 0; i < sizeof(IN_ENCODING); i++) - { - IN_ENCODING[i] = 0xFF; - } - for (unsigned i = 0; i < sizeof(Base32EncoderBuf::OUT_ENCODING); i++) - { - IN_ENCODING[Base32EncoderBuf::OUT_ENCODING[i]] = i; - } - IN_ENCODING[static_cast('=')] = '\0'; - IN_ENCODING_INIT = true; - } -} - - -Base32DecoderBuf::~Base32DecoderBuf() -{ -} - - -int Base32DecoderBuf::readFromDevice() -{ - if (_groupIndex < _groupLength) - { - return _group[_groupIndex++]; - } - else - { - unsigned char buffer[8]; - std::memset(buffer, '=', sizeof(buffer)); - int c; - - // per RFC-4648, Section 6, permissible block lengths are: - // 2, 4, 5, 7, and 8 bytes. Any other length is malformed. - // - do { - if ((c = readOne()) == -1) return -1; - buffer[0] = (unsigned char) c; - if (IN_ENCODING[buffer[0]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) throw DataFormatException(); - buffer[1] = (unsigned char) c; - if (IN_ENCODING[buffer[1]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[2] = (unsigned char) c; - if (IN_ENCODING[buffer[2]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) throw DataFormatException(); - buffer[3] = (unsigned char) c; - if (IN_ENCODING[buffer[3]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[4] = (unsigned char) c; - if (IN_ENCODING[buffer[4]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[5] = (unsigned char) c; - if (IN_ENCODING[buffer[5]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) throw DataFormatException(); - buffer[6] = (unsigned char) c; - if (IN_ENCODING[buffer[6]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[7] = (unsigned char) c; - if (IN_ENCODING[buffer[7]] == 0xFF) throw DataFormatException(); - } while (false); - - _group[0] = (IN_ENCODING[buffer[0]] << 3) | (IN_ENCODING[buffer[1]] >> 2); - _group[1] = ((IN_ENCODING[buffer[1]] & 0x03) << 6) | (IN_ENCODING[buffer[2]] << 1) | (IN_ENCODING[buffer[3]] >> 4); - _group[2] = ((IN_ENCODING[buffer[3]] & 0x0F) << 4) | (IN_ENCODING[buffer[4]] >> 1); - _group[3] = ((IN_ENCODING[buffer[4]] & 0x01) << 7) | (IN_ENCODING[buffer[5]] << 2) | (IN_ENCODING[buffer[6]] >> 3); - _group[4] = ((IN_ENCODING[buffer[6]] & 0x07) << 5) | IN_ENCODING[buffer[7]]; - - if (buffer[2] == '=') - _groupLength = 1; - else if (buffer[4] == '=') - _groupLength = 2; - else if (buffer[5] == '=') - _groupLength = 3; - else if (buffer[7] == '=') - _groupLength = 4; - else - _groupLength = 5; - _groupIndex = 1; - return _group[0]; - } -} - - -int Base32DecoderBuf::readOne() -{ - int ch = _buf.sbumpc(); - return ch; -} - - -Base32DecoderIOS::Base32DecoderIOS(std::istream& istr): _buf(istr) -{ - poco_ios_init(&_buf); -} - - -Base32DecoderIOS::~Base32DecoderIOS() -{ -} - - -Base32DecoderBuf* Base32DecoderIOS::rdbuf() -{ - return &_buf; -} - - -Base32Decoder::Base32Decoder(std::istream& istr): Base32DecoderIOS(istr), std::istream(&_buf) -{ -} - - -Base32Decoder::~Base32Decoder() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Base32Encoder.cpp b/base/poco/Foundation/src/Base32Encoder.cpp deleted file mode 100644 index dbac2c977f8..00000000000 --- a/base/poco/Foundation/src/Base32Encoder.cpp +++ /dev/null @@ -1,202 +0,0 @@ -// -// Base32Encoder.cpp -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Base32Encoder.h" - - -namespace Poco { - - -const unsigned char Base32EncoderBuf::OUT_ENCODING[32] = -{ - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', '2', '3', '4', '5', '6', '7', -}; - - -Base32EncoderBuf::Base32EncoderBuf(std::ostream& ostr, bool padding): - _groupLength(0), - _buf(*ostr.rdbuf()), - _doPadding(padding) -{ -} - - -Base32EncoderBuf::~Base32EncoderBuf() -{ - try - { - close(); - } - catch (...) - { - } -} - - - -int Base32EncoderBuf::writeToDevice(char c) -{ - static const int eof = std::char_traits::eof(); - - _group[_groupLength++] = (unsigned char) c; - if (_groupLength == 5) - { - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4) | (_group[2] >> 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[2] & 0x0F) << 1) | (_group[3] >> 7); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x7C) >> 2); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x03) << 3) | (_group[4] >> 5); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = (_group[4] & 0x1F); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - _groupLength = 0; - } - return charToInt(c); -} - - -int Base32EncoderBuf::close() -{ - static const int eof = std::char_traits::eof(); - - if (sync() == eof) return eof; - if (_groupLength == 1) - { - _group[1] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding) { - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - } - } - else if (_groupLength == 2) - { - _group[2] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding) { - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - } - } - else if (_groupLength == 3) - { - _group[3] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4) | (_group[2] >> 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[2] & 0x0F) << 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding) { - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - } - } - else if (_groupLength == 4) - { - _group[4] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4) | (_group[2] >> 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[2] & 0x0F) << 1) | (_group[3] >> 7); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x7C) >> 2); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x03) << 3); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding && _buf.sputc('=') == eof) return eof; - } - _groupLength = 0; - return _buf.pubsync(); -} - - -Base32EncoderIOS::Base32EncoderIOS(std::ostream& ostr, bool padding): - _buf(ostr, padding) -{ - poco_ios_init(&_buf); -} - - -Base32EncoderIOS::~Base32EncoderIOS() -{ -} - - -int Base32EncoderIOS::close() -{ - return _buf.close(); -} - - -Base32EncoderBuf* Base32EncoderIOS::rdbuf() -{ - return &_buf; -} - - -Base32Encoder::Base32Encoder(std::ostream& ostr, bool padding): - Base32EncoderIOS(ostr, padding), std::ostream(&_buf) -{ -} - - -Base32Encoder::~Base32Encoder() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/EventLogChannel.cpp b/base/poco/Foundation/src/EventLogChannel.cpp deleted file mode 100644 index ec35a9b5d28..00000000000 --- a/base/poco/Foundation/src/EventLogChannel.cpp +++ /dev/null @@ -1,221 +0,0 @@ -// -// EventLogChannel.cpp -// -// Library: Foundation -// Package: Logging -// Module: EventLogChannel -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/EventLogChannel.h" -#include "Poco/Message.h" -#include "Poco/String.h" -#include "pocomsg.h" - - -namespace Poco { - - -const std::string EventLogChannel::PROP_NAME = "name"; -const std::string EventLogChannel::PROP_HOST = "host"; -const std::string EventLogChannel::PROP_LOGHOST = "loghost"; -const std::string EventLogChannel::PROP_LOGFILE = "logfile"; - - -EventLogChannel::EventLogChannel(): - _logFile("Application"), - _h(0) -{ - const DWORD maxPathLen = MAX_PATH + 1; - char name[maxPathLen]; - int n = GetModuleFileNameA(NULL, name, maxPathLen); - if (n > 0) - { - char* end = name + n - 1; - while (end > name && *end != '\\') --end; - if (*end == '\\') ++end; - _name = end; - } -} - - -EventLogChannel::EventLogChannel(const std::string& name): - _name(name), - _logFile("Application"), - _h(0) -{ -} - - -EventLogChannel::EventLogChannel(const std::string& name, const std::string& host): - _name(name), - _host(host), - _logFile("Application"), - _h(0) -{ -} - - -EventLogChannel::~EventLogChannel() -{ - try - { - close(); - } - catch (...) - { - poco_unexpected(); - } -} - - -void EventLogChannel::open() -{ - setUpRegistry(); - _h = RegisterEventSource(_host.empty() ? NULL : _host.c_str(), _name.c_str()); - if (!_h) throw SystemException("cannot register event source"); -} - - -void EventLogChannel::close() -{ - if (_h) DeregisterEventSource(_h); - _h = 0; -} - - -void EventLogChannel::log(const Message& msg) -{ - if (!_h) open(); - const char* pMsg = msg.getText().c_str(); - ReportEvent(_h, getType(msg), getCategory(msg), POCO_MSG_LOG, NULL, 1, 0, &pMsg, NULL); -} - - -void EventLogChannel::setProperty(const std::string& name, const std::string& value) -{ - if (icompare(name, PROP_NAME) == 0) - _name = value; - else if (icompare(name, PROP_HOST) == 0) - _host = value; - else if (icompare(name, PROP_LOGHOST) == 0) - _host = value; - else if (icompare(name, PROP_LOGFILE) == 0) - _logFile = value; - else - Channel::setProperty(name, value); -} - - -std::string EventLogChannel::getProperty(const std::string& name) const -{ - if (icompare(name, PROP_NAME) == 0) - return _name; - else if (icompare(name, PROP_HOST) == 0) - return _host; - else if (icompare(name, PROP_LOGHOST) == 0) - return _host; - else if (icompare(name, PROP_LOGFILE) == 0) - return _logFile; - else - return Channel::getProperty(name); -} - - -int EventLogChannel::getType(const Message& msg) -{ - switch (msg.getPriority()) - { - case Message::PRIO_TRACE: - case Message::PRIO_DEBUG: - case Message::PRIO_INFORMATION: - return EVENTLOG_INFORMATION_TYPE; - case Message::PRIO_NOTICE: - case Message::PRIO_WARNING: - return EVENTLOG_WARNING_TYPE; - default: - return EVENTLOG_ERROR_TYPE; - } -} - - -int EventLogChannel::getCategory(const Message& msg) -{ - switch (msg.getPriority()) - { - case Message::PRIO_TRACE: - return POCO_CTG_TRACE; - case Message::PRIO_DEBUG: - return POCO_CTG_DEBUG; - case Message::PRIO_INFORMATION: - return POCO_CTG_INFORMATION; - case Message::PRIO_NOTICE: - return POCO_CTG_NOTICE; - case Message::PRIO_WARNING: - return POCO_CTG_WARNING; - case Message::PRIO_ERROR: - return POCO_CTG_ERROR; - case Message::PRIO_CRITICAL: - return POCO_CTG_CRITICAL; - case Message::PRIO_FATAL: - return POCO_CTG_FATAL; - default: - return 0; - } -} - - -void EventLogChannel::setUpRegistry() const -{ - std::string key = "SYSTEM\\CurrentControlSet\\Services\\EventLog\\"; - key.append(_logFile); - key.append("\\"); - key.append(_name); - HKEY hKey; - DWORD disp; - DWORD rc = RegCreateKeyEx(HKEY_LOCAL_MACHINE, key.c_str(), 0, NULL, REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &hKey, &disp); - if (rc != ERROR_SUCCESS) return; - - if (disp == REG_CREATED_NEW_KEY) - { - std::string path; - - if (path.empty()) - path = findLibrary("PocoMsg.dll"); - - if (!path.empty()) - { - DWORD count = 8; - DWORD types = 7; - RegSetValueEx(hKey, "CategoryMessageFile", 0, REG_SZ, (const BYTE*) path.c_str(), static_cast(path.size() + 1)); - RegSetValueEx(hKey, "EventMessageFile", 0, REG_SZ, (const BYTE*) path.c_str(), static_cast(path.size() + 1)); - RegSetValueEx(hKey, "CategoryCount", 0, REG_DWORD, (const BYTE*) &count, static_cast(sizeof(count))); - RegSetValueEx(hKey, "TypesSupported", 0, REG_DWORD, (const BYTE*) &types, static_cast(sizeof(types))); - } - } - RegCloseKey(hKey); -} - - -std::string EventLogChannel::findLibrary(const char* name) -{ - std::string path; - HMODULE dll = LoadLibraryA(name); - if (dll) - { - const DWORD maxPathLen = MAX_PATH + 1; - char name[maxPathLen]; - int n = GetModuleFileNameA(dll, name, maxPathLen); - if (n > 0) path = name; - FreeLibrary(dll); - } - return path; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/FPEnvironment_DEC.cpp b/base/poco/Foundation/src/FPEnvironment_DEC.cpp deleted file mode 100644 index b5995f83bf5..00000000000 --- a/base/poco/Foundation/src/FPEnvironment_DEC.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// -// FPEnvironment_DEC.cpp -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -// -// _XOPEN_SOURCE disables the ieee fp functions -// in , therefore we undefine it for this file. -// -#undef _XOPEN_SOURCE - - -#include -#include -#include -#include "Poco/FPEnvironment_DEC.h" - - -namespace Poco { - - -FPEnvironmentImpl::FPEnvironmentImpl() -{ - _env = ieee_get_fp_control(); -} - - -FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env) -{ - _env = env._env; -} - - -FPEnvironmentImpl::~FPEnvironmentImpl() -{ - ieee_set_fp_control(_env); -} - - -FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env) -{ - _env = env._env; - return *this; -} - - -bool FPEnvironmentImpl::isInfiniteImpl(float value) -{ - int cls = fp_classf(value); - return cls == FP_POS_INF || cls == FP_NEG_INF; -} - - -bool FPEnvironmentImpl::isInfiniteImpl(double value) -{ - int cls = fp_class(value); - return cls == FP_POS_INF || cls == FP_NEG_INF; -} - - -bool FPEnvironmentImpl::isInfiniteImpl(long double value) -{ - int cls = fp_classl(value); - return cls == FP_POS_INF || cls == FP_NEG_INF; -} - - -bool FPEnvironmentImpl::isNaNImpl(float value) -{ - return isnanf(value) != 0; -} - - -bool FPEnvironmentImpl::isNaNImpl(double value) -{ - return isnan(value) != 0; -} - - -bool FPEnvironmentImpl::isNaNImpl(long double value) -{ - return isnanl(value) != 0; -} - - -float FPEnvironmentImpl::copySignImpl(float target, float source) -{ - return copysignf(target, source); -} - - -double FPEnvironmentImpl::copySignImpl(double target, double source) -{ - return copysign(target, source); -} - - -long double FPEnvironmentImpl::copySignImpl(long double target, long double source) -{ - return copysignl(target, source); -} - - -void FPEnvironmentImpl::keepCurrentImpl() -{ - ieee_set_fp_control(_env); -} - - -void FPEnvironmentImpl::clearFlagsImpl() -{ - ieee_set_fp_control(0); -} - - -bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag) -{ - return (ieee_get_fp_control() & flag) != 0; -} - - -void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode) -{ - // not supported -} - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl() -{ - // not supported - return FPEnvironmentImpl::RoundingModeImpl(0); -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/FPEnvironment_DUMMY.cpp b/base/poco/Foundation/src/FPEnvironment_DUMMY.cpp deleted file mode 100644 index b473d0dfb93..00000000000 --- a/base/poco/Foundation/src/FPEnvironment_DUMMY.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// -// FPEnvironment_C99.cpp -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/FPEnvironment_DUMMY.h" - - -namespace Poco { - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::_roundingMode; - - -FPEnvironmentImpl::FPEnvironmentImpl() -{ -} - - -FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env) -{ -} - - -FPEnvironmentImpl::~FPEnvironmentImpl() -{ -} - - -FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env) -{ - return *this; -} - - -void FPEnvironmentImpl::keepCurrentImpl() -{ -} - - -void FPEnvironmentImpl::clearFlagsImpl() -{ -} - - -bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag) -{ - return false; -} - - -void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode) -{ - _roundingMode = mode; -} - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl() -{ - return _roundingMode; -} - - -long double FPEnvironmentImpl::copySignImpl(long double target, long double source) -{ - return (source >= 0 && target >= 0) || (source < 0 && target < 0) ? target : -target; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/FPEnvironment_QNX.cpp b/base/poco/Foundation/src/FPEnvironment_QNX.cpp deleted file mode 100644 index 057eb8eb7bc..00000000000 --- a/base/poco/Foundation/src/FPEnvironment_QNX.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// -// FPEnvironment_QNX.cpp -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/FPEnvironment_QNX.h" - - -namespace Poco { - - -FPEnvironmentImpl::FPEnvironmentImpl() -{ - fegetenv(&_env); -} - - -FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env) -{ - _env = env._env; -} - - -FPEnvironmentImpl::~FPEnvironmentImpl() -{ - fesetenv(&_env); -} - - -FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env) -{ - _env = env._env; - return *this; -} - - -void FPEnvironmentImpl::keepCurrentImpl() -{ - fegetenv(&_env); -} - - -void FPEnvironmentImpl::clearFlagsImpl() -{ - feclearexcept(FE_ALL_EXCEPT); -} - - -bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag) -{ - return fetestexcept(flag) != 0; -} - - -void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode) -{ - fesetround(mode); -} - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl() -{ - return (RoundingModeImpl) fegetround(); -} - - -long double FPEnvironmentImpl::copySignImpl(long double target, long double source) -{ - return (source >= 0 && target >= 0) || (source < 0 && target < 0) ? target : -target; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Format.cpp b/base/poco/Foundation/src/Format.cpp index ed5db5c21b2..9872ddff042 100644 --- a/base/poco/Foundation/src/Format.cpp +++ b/base/poco/Foundation/src/Format.cpp @@ -16,9 +16,6 @@ #include "Poco/Exception.h" #include "Poco/Ascii.h" #include -#if !defined(POCO_NO_LOCALE) -#include -#endif #include @@ -147,9 +144,6 @@ namespace void formatOne(std::string& result, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt, std::vector::const_iterator& itVal) { std::ostringstream str; -#if !defined(POCO_NO_LOCALE) - str.imbue(std::locale::classic()); -#endif try { parseFlags(str, itFmt, endFmt); diff --git a/base/poco/Foundation/src/Latin1Encoding.cpp b/base/poco/Foundation/src/Latin1Encoding.cpp deleted file mode 100644 index c5aa00d745a..00000000000 --- a/base/poco/Foundation/src/Latin1Encoding.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// -// Latin1Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Latin1Encoding -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Latin1Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Latin1Encoding::_names[] = -{ - "ISO-8859-1", - "Latin1", - "Latin-1", - NULL -}; - - -const TextEncoding::CharacterMap Latin1Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - /* 10 */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - /* 20 */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - /* 30 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - /* 40 */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - /* 50 */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - /* 60 */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - /* 70 */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - /* 80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - /* 90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - /* a0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - /* b0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - /* c0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - /* d0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - /* e0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - /* f0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}; - - -Latin1Encoding::Latin1Encoding() -{ -} - - -Latin1Encoding::~Latin1Encoding() -{ -} - - -const char* Latin1Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Latin1Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Latin1Encoding::characterMap() const -{ - return _charMap; -} - - -int Latin1Encoding::convert(const unsigned char* bytes) const -{ - return *bytes; -} - - -int Latin1Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255) - { - if (bytes && length >= 1) - *bytes = (unsigned char) ch; - return 1; - } - else return 0; -} - - -int Latin1Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return *bytes; - else - return -1; -} - - -int Latin1Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Latin2Encoding.cpp b/base/poco/Foundation/src/Latin2Encoding.cpp deleted file mode 100644 index a0c77150099..00000000000 --- a/base/poco/Foundation/src/Latin2Encoding.cpp +++ /dev/null @@ -1,179 +0,0 @@ -// -// Latin2Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Latin2Encoding -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Latin2Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Latin2Encoding::_names[] = -{ - "ISO-8859-2", - "Latin2", - "Latin-2", - NULL -}; - - -const TextEncoding::CharacterMap Latin2Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - /* 90 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - /* a0 */ 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, - /* b0 */ 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, - /* c0 */ 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, - /* d0 */ 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, - /* e0 */ 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, - /* f0 */ 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, -}; - - -Latin2Encoding::Latin2Encoding() -{ -} - - -Latin2Encoding::~Latin2Encoding() -{ -} - - -const char* Latin2Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Latin2Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Latin2Encoding::characterMap() const -{ - return _charMap; -} - - -int Latin2Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Latin2Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = (unsigned char) ch; - return 1; - } - switch(ch) - { - case 0x0104: if (bytes && length >= 1) *bytes = 0xa1; return 1; - case 0x02d8: if (bytes && length >= 1) *bytes = 0xa2; return 1; - case 0x0141: if (bytes && length >= 1) *bytes = 0xa3; return 1; - case 0x013d: if (bytes && length >= 1) *bytes = 0xa5; return 1; - case 0x015a: if (bytes && length >= 1) *bytes = 0xa6; return 1; - case 0x0160: if (bytes && length >= 1) *bytes = 0xa9; return 1; - case 0x015e: if (bytes && length >= 1) *bytes = 0xaa; return 1; - case 0x0164: if (bytes && length >= 1) *bytes = 0xab; return 1; - case 0x0179: if (bytes && length >= 1) *bytes = 0xac; return 1; - case 0x017d: if (bytes && length >= 1) *bytes = 0xae; return 1; - case 0x017b: if (bytes && length >= 1) *bytes = 0xaf; return 1; - case 0x0105: if (bytes && length >= 1) *bytes = 0xb1; return 1; - case 0x02db: if (bytes && length >= 1) *bytes = 0xb2; return 1; - case 0x0142: if (bytes && length >= 1) *bytes = 0xb3; return 1; - case 0x013e: if (bytes && length >= 1) *bytes = 0xb5; return 1; - case 0x015b: if (bytes && length >= 1) *bytes = 0xb6; return 1; - case 0x02c7: if (bytes && length >= 1) *bytes = 0xb7; return 1; - case 0x0161: if (bytes && length >= 1) *bytes = 0xb9; return 1; - case 0x015f: if (bytes && length >= 1) *bytes = 0xba; return 1; - case 0x0165: if (bytes && length >= 1) *bytes = 0xbb; return 1; - case 0x017a: if (bytes && length >= 1) *bytes = 0xbc; return 1; - case 0x02dd: if (bytes && length >= 1) *bytes = 0xbd; return 1; - case 0x017e: if (bytes && length >= 1) *bytes = 0xbe; return 1; - case 0x017c: if (bytes && length >= 1) *bytes = 0xbf; return 1; - case 0x0154: if (bytes && length >= 1) *bytes = 0xc0; return 1; - case 0x0102: if (bytes && length >= 1) *bytes = 0xc3; return 1; - case 0x0139: if (bytes && length >= 1) *bytes = 0xc5; return 1; - case 0x0106: if (bytes && length >= 1) *bytes = 0xc6; return 1; - case 0x010c: if (bytes && length >= 1) *bytes = 0xc8; return 1; - case 0x0118: if (bytes && length >= 1) *bytes = 0xca; return 1; - case 0x011a: if (bytes && length >= 1) *bytes = 0xcc; return 1; - case 0x010e: if (bytes && length >= 1) *bytes = 0xcf; return 1; - case 0x0110: if (bytes && length >= 1) *bytes = 0xd0; return 1; - case 0x0143: if (bytes && length >= 1) *bytes = 0xd1; return 1; - case 0x0147: if (bytes && length >= 1) *bytes = 0xd2; return 1; - case 0x0150: if (bytes && length >= 1) *bytes = 0xd5; return 1; - case 0x0158: if (bytes && length >= 1) *bytes = 0xd8; return 1; - case 0x016e: if (bytes && length >= 1) *bytes = 0xd9; return 1; - case 0x0170: if (bytes && length >= 1) *bytes = 0xdb; return 1; - case 0x0162: if (bytes && length >= 1) *bytes = 0xde; return 1; - case 0x0155: if (bytes && length >= 1) *bytes = 0xe0; return 1; - case 0x0103: if (bytes && length >= 1) *bytes = 0xe3; return 1; - case 0x013a: if (bytes && length >= 1) *bytes = 0xe5; return 1; - case 0x0107: if (bytes && length >= 1) *bytes = 0xe6; return 1; - case 0x010d: if (bytes && length >= 1) *bytes = 0xe8; return 1; - case 0x0119: if (bytes && length >= 1) *bytes = 0xea; return 1; - case 0x011b: if (bytes && length >= 1) *bytes = 0xec; return 1; - case 0x010f: if (bytes && length >= 1) *bytes = 0xef; return 1; - case 0x0111: if (bytes && length >= 1) *bytes = 0xf0; return 1; - case 0x0144: if (bytes && length >= 1) *bytes = 0xf1; return 1; - case 0x0148: if (bytes && length >= 1) *bytes = 0xf2; return 1; - case 0x0151: if (bytes && length >= 1) *bytes = 0xf5; return 1; - case 0x0159: if (bytes && length >= 1) *bytes = 0xf8; return 1; - case 0x016f: if (bytes && length >= 1) *bytes = 0xf9; return 1; - case 0x0171: if (bytes && length >= 1) *bytes = 0xfb; return 1; - case 0x0163: if (bytes && length >= 1) *bytes = 0xfe; return 1; - case 0x02d9: if (bytes && length >= 1) *bytes = 0xff; return 1; - default: return 0; - } -} - - -int Latin2Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Latin2Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Latin9Encoding.cpp b/base/poco/Foundation/src/Latin9Encoding.cpp deleted file mode 100644 index eadc71f30e0..00000000000 --- a/base/poco/Foundation/src/Latin9Encoding.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// -// Latin9Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Latin9Encoding -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Latin9Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Latin9Encoding::_names[] = -{ - "ISO-8859-15", - "Latin9", - "Latin-9", - NULL -}; - - -const TextEncoding::CharacterMap Latin9Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - /* 90 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - /* a0 */ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - /* b0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, - /* c0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - /* d0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - /* e0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - /* f0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -}; - - -Latin9Encoding::Latin9Encoding() -{ -} - - -Latin9Encoding::~Latin9Encoding() -{ -} - - -const char* Latin9Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Latin9Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Latin9Encoding::characterMap() const -{ - return _charMap; -} - - -int Latin9Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Latin9Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = ch; - return 1; - } - else switch (ch) - { - case 0x0152: if (bytes && length >= 1) *bytes = 0xbc; return 1; - case 0x0153: if (bytes && length >= 1) *bytes = 0xbd; return 1; - case 0x0160: if (bytes && length >= 1) *bytes = 0xa6; return 1; - case 0x0161: if (bytes && length >= 1) *bytes = 0xa8; return 1; - case 0x017d: if (bytes && length >= 1) *bytes = 0xb4; return 1; - case 0x017e: if (bytes && length >= 1) *bytes = 0xb8; return 1; - case 0x0178: if (bytes && length >= 1) *bytes = 0xbe; return 1; - case 0x20ac: if (bytes && length >= 1) *bytes = 0xa4; return 1; - default: return 0; - } -} - - -int Latin9Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Latin9Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/MD4Engine.cpp b/base/poco/Foundation/src/MD4Engine.cpp deleted file mode 100644 index cab90623185..00000000000 --- a/base/poco/Foundation/src/MD4Engine.cpp +++ /dev/null @@ -1,278 +0,0 @@ -// -// MD4Engine.cpp -// -// Library: Foundation -// Package: Crypt -// Module: MD4Engine -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// -// -// MD4 (RFC 1320) algorithm: -// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -// rights reserved. -// -// License to copy and use this software is granted provided that it -// is identified as the "RSA Data Security, Inc. MD4 Message-Digest -// Algorithm" in all material mentioning or referencing this software -// or this function. -// -// License is also granted to make and use derivative works provided -// that such works are identified as "derived from the RSA Data -// Security, Inc. MD4 Message-Digest Algorithm" in all material -// mentioning or referencing the derived work. -// -// RSA Data Security, Inc. makes no representations concerning either -// the merchantability of this software or the suitability of this -// software for any particular purpose. It is provided "as is" -// without express or implied warranty of any kind. -// -// These notices must be retained in any copies of any part of this -// documentation and/or software. -// - - -#include "Poco/MD4Engine.h" -#include - - -namespace Poco { - - -MD4Engine::MD4Engine() -{ - _digest.reserve(16); - reset(); -} - - -MD4Engine::~MD4Engine() -{ - reset(); -} - - -void MD4Engine::updateImpl(const void* input_, std::size_t inputLen) -{ - const unsigned char* input = (const unsigned char*) input_; - unsigned int i, index, partLen; - - /* Compute number of bytes mod 64 */ - index = (unsigned int)((_context.count[0] >> 3) & 0x3F); - - /* Update number of bits */ - if ((_context.count[0] += ((UInt32) inputLen << 3)) < ((UInt32) inputLen << 3)) - _context.count[1]++; - _context.count[1] += ((UInt32) inputLen >> 29); - - partLen = 64 - index; - - /* Transform as many times as possible. */ - if (inputLen >= partLen) - { - std::memcpy(&_context.buffer[index], input, partLen); - transform(_context.state, _context.buffer); - - for (i = partLen; i + 63 < inputLen; i += 64) - transform(_context.state, &input[i]); - - index = 0; - } - else i = 0; - - /* Buffer remaining input */ - std::memcpy(&_context.buffer[index], &input[i], inputLen-i); -} - - -std::size_t MD4Engine::digestLength() const -{ - return DIGEST_SIZE; -} - - -void MD4Engine::reset() -{ - std::memset(&_context, 0, sizeof(_context)); - _context.count[0] = _context.count[1] = 0; - _context.state[0] = 0x67452301; - _context.state[1] = 0xefcdab89; - _context.state[2] = 0x98badcfe; - _context.state[3] = 0x10325476; -} - - -const DigestEngine::Digest& MD4Engine::digest() -{ - static const unsigned char PADDING[64] = - { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - unsigned char bits[8]; - unsigned int index, padLen; - - /* Save number of bits */ - encode(bits, _context.count, 8); - - /* Pad out to 56 mod 64. */ - index = (unsigned int)((_context.count[0] >> 3) & 0x3f); - padLen = (index < 56) ? (56 - index) : (120 - index); - update(PADDING, padLen); - - /* Append length (before padding) */ - update(bits, 8); - - /* Store state in digest */ - unsigned char digest[16]; - encode(digest, _context.state, 16); - _digest.clear(); - _digest.insert(_digest.begin(), digest, digest + sizeof(digest)); - - /* Zeroize sensitive information. */ - std::memset(&_context, 0, sizeof (_context)); - reset(); - return _digest; -} - - -/* Constants for MD4Transform routine. */ -#define S11 3 -#define S12 7 -#define S13 11 -#define S14 19 -#define S21 3 -#define S22 5 -#define S23 9 -#define S24 13 -#define S31 3 -#define S32 9 -#define S33 11 -#define S34 15 - - -/* F, G and H are basic MD4 functions. */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - - -/* ROTATE_LEFT rotates x left n bits. */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) - - -/* FF, GG and HH are transformations for rounds 1, 2 and 3 */ -/* Rotation is separate from addition to prevent recomputation */ -#define FF(a, b, c, d, x, s) { \ - (a) += F ((b), (c), (d)) + (x); \ - (a) = ROTATE_LEFT ((a), (s)); \ - } -#define GG(a, b, c, d, x, s) { \ - (a) += G ((b), (c), (d)) + (x) + (UInt32)0x5a827999; \ - (a) = ROTATE_LEFT ((a), (s)); \ - } -#define HH(a, b, c, d, x, s) { \ - (a) += H ((b), (c), (d)) + (x) + (UInt32)0x6ed9eba1; \ - (a) = ROTATE_LEFT ((a), (s)); \ - } - - -void MD4Engine::transform (UInt32 state[4], const unsigned char block[64]) -{ - UInt32 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; - - decode(x, block, 64); - - /* Round 1 */ - FF (a, b, c, d, x[ 0], S11); /* 1 */ - FF (d, a, b, c, x[ 1], S12); /* 2 */ - FF (c, d, a, b, x[ 2], S13); /* 3 */ - FF (b, c, d, a, x[ 3], S14); /* 4 */ - FF (a, b, c, d, x[ 4], S11); /* 5 */ - FF (d, a, b, c, x[ 5], S12); /* 6 */ - FF (c, d, a, b, x[ 6], S13); /* 7 */ - FF (b, c, d, a, x[ 7], S14); /* 8 */ - FF (a, b, c, d, x[ 8], S11); /* 9 */ - FF (d, a, b, c, x[ 9], S12); /* 10 */ - FF (c, d, a, b, x[10], S13); /* 11 */ - FF (b, c, d, a, x[11], S14); /* 12 */ - FF (a, b, c, d, x[12], S11); /* 13 */ - FF (d, a, b, c, x[13], S12); /* 14 */ - FF (c, d, a, b, x[14], S13); /* 15 */ - FF (b, c, d, a, x[15], S14); /* 16 */ - - /* Round 2 */ - GG (a, b, c, d, x[ 0], S21); /* 17 */ - GG (d, a, b, c, x[ 4], S22); /* 18 */ - GG (c, d, a, b, x[ 8], S23); /* 19 */ - GG (b, c, d, a, x[12], S24); /* 20 */ - GG (a, b, c, d, x[ 1], S21); /* 21 */ - GG (d, a, b, c, x[ 5], S22); /* 22 */ - GG (c, d, a, b, x[ 9], S23); /* 23 */ - GG (b, c, d, a, x[13], S24); /* 24 */ - GG (a, b, c, d, x[ 2], S21); /* 25 */ - GG (d, a, b, c, x[ 6], S22); /* 26 */ - GG (c, d, a, b, x[10], S23); /* 27 */ - GG (b, c, d, a, x[14], S24); /* 28 */ - GG (a, b, c, d, x[ 3], S21); /* 29 */ - GG (d, a, b, c, x[ 7], S22); /* 30 */ - GG (c, d, a, b, x[11], S23); /* 31 */ - GG (b, c, d, a, x[15], S24); /* 32 */ - - /* Round 3 */ - HH (a, b, c, d, x[ 0], S31); /* 33 */ - HH (d, a, b, c, x[ 8], S32); /* 34 */ - HH (c, d, a, b, x[ 4], S33); /* 35 */ - HH (b, c, d, a, x[12], S34); /* 36 */ - HH (a, b, c, d, x[ 2], S31); /* 37 */ - HH (d, a, b, c, x[10], S32); /* 38 */ - HH (c, d, a, b, x[ 6], S33); /* 39 */ - HH (b, c, d, a, x[14], S34); /* 40 */ - HH (a, b, c, d, x[ 1], S31); /* 41 */ - HH (d, a, b, c, x[ 9], S32); /* 42 */ - HH (c, d, a, b, x[ 5], S33); /* 43 */ - HH (b, c, d, a, x[13], S34); /* 44 */ - HH (a, b, c, d, x[ 3], S31); /* 45 */ - HH (d, a, b, c, x[11], S32); /* 46 */ - HH (c, d, a, b, x[ 7], S33); /* 47 */ - HH (b, c, d, a, x[15], S34); /* 48 */ - - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - - /* Zeroize sensitive information. */ - std::memset(x, 0, sizeof(x)); -} - - -void MD4Engine::encode(unsigned char* output, const UInt32* input, std::size_t len) -{ - unsigned int i, j; - - for (i = 0, j = 0; j < len; i++, j += 4) - { - output[j] = (unsigned char)(input[i] & 0xff); - output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); - output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); - output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); - } -} - - -void MD4Engine::decode(UInt32* output, const unsigned char* input, std::size_t len) -{ - unsigned int i, j; - - for (i = 0, j = 0; j < len; i++, j += 4) - output[i] = ((UInt32)input[j]) | (((UInt32)input[j+1]) << 8) | - (((UInt32)input[j+2]) << 16) | (((UInt32)input[j+3]) << 24); -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Manifest.cpp b/base/poco/Foundation/src/Manifest.cpp deleted file mode 100644 index c4e828c5e97..00000000000 --- a/base/poco/Foundation/src/Manifest.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Manifest.cpp -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Manifest.h" - - -namespace Poco { - - -ManifestBase::ManifestBase() -{ -} - - -ManifestBase::~ManifestBase() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp index 0dfe4323134..663c96e47a2 100644 --- a/base/poco/Foundation/src/Message.cpp +++ b/base/poco/Foundation/src/Message.cpp @@ -27,8 +27,7 @@ Message::Message(): _tid(0), _file(0), _line(0), - _pMap(0), - _fmt_str(0) + _pMap(0) { init(); } @@ -157,6 +156,12 @@ void Message::setText(const std::string& text) } +void Message::appendText(const std::string & text) +{ + _text.append(text); +} + + void Message::setPriority(Priority prio) { _prio = prio; diff --git a/base/poco/Foundation/src/NumberFormatter.cpp b/base/poco/Foundation/src/NumberFormatter.cpp index 0a9334059a9..328164911bf 100644 --- a/base/poco/Foundation/src/NumberFormatter.cpp +++ b/base/poco/Foundation/src/NumberFormatter.cpp @@ -15,9 +15,6 @@ #include "Poco/NumberFormatter.h" #include "Poco/MemoryStream.h" #include -#if !defined(POCO_NO_LOCALE) -#include -#endif #include diff --git a/base/poco/Foundation/src/NumberParser.cpp b/base/poco/Foundation/src/NumberParser.cpp index 8d32e1a722c..b4faa455922 100644 --- a/base/poco/Foundation/src/NumberParser.cpp +++ b/base/poco/Foundation/src/NumberParser.cpp @@ -19,9 +19,6 @@ #include #include #include -#if !defined(POCO_NO_LOCALE) - #include -#endif #if defined(POCO_LONG_IS_64_BIT) diff --git a/base/poco/Foundation/src/NumericString.cpp b/base/poco/Foundation/src/NumericString.cpp index 90b943015d6..7141d5d5d57 100644 --- a/base/poco/Foundation/src/NumericString.cpp +++ b/base/poco/Foundation/src/NumericString.cpp @@ -14,23 +14,9 @@ #include "Poco/Bugcheck.h" - -// +++ double conversion +++ -#define double_conversion poco_double_conversion // don't collide with standalone double_conversion library -#define UNREACHABLE poco_bugcheck -#define UNIMPLEMENTED poco_bugcheck -#include "diy-fp.cc" -#include "cached-powers.cc" -#include "bignum-dtoa.cc" -#include "bignum.cc" -#include "fast-dtoa.cc" -#include "fixed-dtoa.cc" -#include "strtod.cc" -#include "double-conversion.cc" -// --- double conversion --- +#include #include "Poco/NumericString.h" -poco_static_assert(POCO_MAX_FLT_STRING_LEN == double_conversion::kMaxSignificantDecimalDigits); #include "Poco/String.h" #include #include @@ -263,7 +249,7 @@ float strToFloat(const char* str) int processed; int flags = StringToDoubleConverter::ALLOW_LEADING_SPACES | StringToDoubleConverter::ALLOW_TRAILING_SPACES; - StringToDoubleConverter converter(flags, 0.0, Single::NaN(), POCO_FLT_INF, POCO_FLT_NAN); + StringToDoubleConverter converter(flags, 0.0, std::numeric_limits::quiet_NaN(), POCO_FLT_INF, POCO_FLT_NAN); float result = converter.StringToFloat(str, static_cast(strlen(str)), &processed); return result; } @@ -275,7 +261,7 @@ double strToDouble(const char* str) int processed; int flags = StringToDoubleConverter::ALLOW_LEADING_SPACES | StringToDoubleConverter::ALLOW_TRAILING_SPACES; - StringToDoubleConverter converter(flags, 0.0, Double::NaN(), POCO_FLT_INF, POCO_FLT_NAN); + StringToDoubleConverter converter(flags, 0.0, std::numeric_limits::quiet_NaN(), POCO_FLT_INF, POCO_FLT_NAN); double result = converter.StringToDouble(str, static_cast(strlen(str)), &processed); return result; } diff --git a/base/poco/Foundation/src/PipeImpl_DUMMY.cpp b/base/poco/Foundation/src/PipeImpl_DUMMY.cpp deleted file mode 100644 index b0faf7b68ba..00000000000 --- a/base/poco/Foundation/src/PipeImpl_DUMMY.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// PipeImpl_DUMMY.cpp -// -// Library: Foundation -// Package: Processes -// Module: PipeImpl -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/PipeImpl_DUMMY.h" - - -namespace Poco { - - -PipeImpl::PipeImpl() -{ -} - - -PipeImpl::~PipeImpl() -{ -} - - -int PipeImpl::writeBytes(const void* buffer, int length) -{ - return 0; -} - - -int PipeImpl::readBytes(void* buffer, int length) -{ - return 0; -} - - -PipeImpl::Handle PipeImpl::readHandle() const -{ - return 0; -} - - -PipeImpl::Handle PipeImpl::writeHandle() const -{ - return 0; -} - - -void PipeImpl::closeRead() -{ -} - - -void PipeImpl::closeWrite() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/PipeStream.cpp b/base/poco/Foundation/src/PipeStream.cpp deleted file mode 100644 index 96fb323581d..00000000000 --- a/base/poco/Foundation/src/PipeStream.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// -// PipeStream.cpp -// -// Library: Foundation -// Package: Processes -// Module: PipeStream -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/PipeStream.h" - - -namespace Poco { - - -// -// PipeStreamBuf -// - - -PipeStreamBuf::PipeStreamBuf(const Pipe& pipe, openmode mode): - BufferedStreamBuf(STREAM_BUFFER_SIZE, mode), - _pipe(pipe) -{ -} - - -PipeStreamBuf::~PipeStreamBuf() -{ -} - - -int PipeStreamBuf::readFromDevice(char* buffer, std::streamsize length) -{ - return _pipe.readBytes(buffer, (int) length); -} - - -int PipeStreamBuf::writeToDevice(const char* buffer, std::streamsize length) -{ - return _pipe.writeBytes(buffer, (int) length); -} - - -void PipeStreamBuf::close() -{ - _pipe.close(Pipe::CLOSE_BOTH); -} - - -// -// PipeIOS -// - - -PipeIOS::PipeIOS(const Pipe& pipe, openmode mode): - _buf(pipe, mode) -{ - poco_ios_init(&_buf); -} - - -PipeIOS::~PipeIOS() -{ - try - { - _buf.sync(); - } - catch (...) - { - } -} - - -PipeStreamBuf* PipeIOS::rdbuf() -{ - return &_buf; -} - - -void PipeIOS::close() -{ - _buf.sync(); - _buf.close(); -} - - -// -// PipeOutputStream -// - - -PipeOutputStream::PipeOutputStream(const Pipe& pipe): - PipeIOS(pipe, std::ios::out), - std::ostream(&_buf) -{ -} - - -PipeOutputStream::~PipeOutputStream() -{ -} - - -// -// PipeInputStream -// - - -PipeInputStream::PipeInputStream(const Pipe& pipe): - PipeIOS(pipe, std::ios::in), - std::istream(&_buf) -{ -} - - -PipeInputStream::~PipeInputStream() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Semaphore_VX.cpp b/base/poco/Foundation/src/Semaphore_VX.cpp deleted file mode 100644 index 5bc63b530c7..00000000000 --- a/base/poco/Foundation/src/Semaphore_VX.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// -// Semaphore_VX.cpp -// -// Library: Foundation -// Package: Threading -// Module: Semaphore -// -// Copyright (c) 2004-2011, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Semaphore_VX.h" -#include - - -namespace Poco { - - -SemaphoreImpl::SemaphoreImpl(int n, int max) -{ - poco_assert (n >= 0 && max > 0 && n <= max); - - _sem = semCCreate(SEM_Q_PRIORITY, n); - if (_sem == 0) - throw Poco::SystemException("cannot create semaphore"); -} - - -SemaphoreImpl::~SemaphoreImpl() -{ - semDelete(_sem); -} - - -void SemaphoreImpl::waitImpl() -{ - if (semTake(_sem, WAIT_FOREVER) != OK) - throw SystemException("cannot wait for semaphore"); -} - - -bool SemaphoreImpl::waitImpl(long milliseconds) -{ - int ticks = milliseconds*sysClkRateGet()/1000; - return semTake(_sem, ticks) == OK; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Semaphore_WIN32.cpp b/base/poco/Foundation/src/Semaphore_WIN32.cpp deleted file mode 100644 index 2ec04a8c02d..00000000000 --- a/base/poco/Foundation/src/Semaphore_WIN32.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// Semaphore_WIN32.cpp -// -// Library: Foundation -// Package: Threading -// Module: Semaphore -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Semaphore_WIN32.h" - - -namespace Poco { - - -SemaphoreImpl::SemaphoreImpl(int n, int max) -{ - poco_assert (n >= 0 && max > 0 && n <= max); - - _sema = CreateSemaphoreW(NULL, n, max, NULL); - if (!_sema) - { - throw SystemException("cannot create semaphore"); - } -} - - -SemaphoreImpl::~SemaphoreImpl() -{ - CloseHandle(_sema); -} - - -void SemaphoreImpl::waitImpl() -{ - switch (WaitForSingleObject(_sema, INFINITE)) - { - case WAIT_OBJECT_0: - return; - default: - throw SystemException("wait for semaphore failed"); - } -} - - -bool SemaphoreImpl::waitImpl(long milliseconds) -{ - switch (WaitForSingleObject(_sema, milliseconds + 1)) - { - case WAIT_TIMEOUT: - return false; - case WAIT_OBJECT_0: - return true; - default: - throw SystemException("wait for semaphore failed"); - } -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/SharedMemory_DUMMY.cpp b/base/poco/Foundation/src/SharedMemory_DUMMY.cpp deleted file mode 100644 index 38586323806..00000000000 --- a/base/poco/Foundation/src/SharedMemory_DUMMY.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// -// SharedMemoryImpl.cpp -// -// Library: Foundation -// Package: Processes -// Module: SharedMemoryImpl -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/SharedMemory_DUMMY.h" - - -namespace Poco { - - -SharedMemoryImpl::SharedMemoryImpl(const std::string&, std::size_t, SharedMemory::AccessMode, const void*, bool) -{ -} - - -SharedMemoryImpl::SharedMemoryImpl(const Poco::File&, SharedMemory::AccessMode, const void*) -{ -} - - -SharedMemoryImpl::~SharedMemoryImpl() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/StreamTokenizer.cpp b/base/poco/Foundation/src/StreamTokenizer.cpp deleted file mode 100644 index 1b6e936274e..00000000000 --- a/base/poco/Foundation/src/StreamTokenizer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// -// StreamTokenizer.cpp -// -// Library: Foundation -// Package: Streams -// Module: StreamTokenizer -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/StreamTokenizer.h" - - -namespace Poco { - - -StreamTokenizer::StreamTokenizer(): - _pIstr(0) -{ -} - - -StreamTokenizer::StreamTokenizer(std::istream& istr): - _pIstr(&istr) -{ -} - - -StreamTokenizer::~StreamTokenizer() -{ - for (TokenVec::iterator it = _tokens.begin(); it != _tokens.end(); ++it) - { - delete it->pToken; - } -} - - -void StreamTokenizer::attachToStream(std::istream& istr) -{ - _pIstr = &istr; -} - - -void StreamTokenizer::addToken(Token* pToken) -{ - poco_check_ptr (pToken); - - TokenInfo ti; - ti.pToken = pToken; - ti.ignore = (pToken->tokenClass() == Token::COMMENT_TOKEN || pToken->tokenClass() == Token::WHITESPACE_TOKEN); - _tokens.push_back(ti); -} - - -void StreamTokenizer::addToken(Token* pToken, bool ignore) -{ - poco_check_ptr (pToken); - - TokenInfo ti; - ti.pToken = pToken; - ti.ignore = ignore; - _tokens.push_back(ti); -} - - -const Token* StreamTokenizer::next() -{ - poco_check_ptr (_pIstr); - - static const int eof = std::char_traits::eof(); - - int first = _pIstr->get(); - TokenVec::const_iterator it = _tokens.begin(); - while (first != eof && it != _tokens.end()) - { - const TokenInfo& ti = *it; - if (ti.pToken->start((char) first, *_pIstr)) - { - ti.pToken->finish(*_pIstr); - if (ti.ignore) - { - first = _pIstr->get(); - it = _tokens.begin(); - } - else return ti.pToken; - } - else ++it; - } - if (first == eof) - { - return &_eofToken; - } - else - { - _invalidToken.start((char) first, *_pIstr); - return &_invalidToken; - } -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/SynchronizedObject.cpp b/base/poco/Foundation/src/SynchronizedObject.cpp deleted file mode 100644 index 6a42e6594dc..00000000000 --- a/base/poco/Foundation/src/SynchronizedObject.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// SynchronizedObject.cpp -// -// Library: Foundation -// Package: Threading -// Module: SynchronizedObject -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/SynchronizedObject.h" - - -namespace Poco { - - -SynchronizedObject::SynchronizedObject() -{ -} - - -SynchronizedObject::~SynchronizedObject() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/TextEncoding.cpp b/base/poco/Foundation/src/TextEncoding.cpp index cfd1c77ef95..f652d6f5d37 100644 --- a/base/poco/Foundation/src/TextEncoding.cpp +++ b/base/poco/Foundation/src/TextEncoding.cpp @@ -16,15 +16,9 @@ #include "Poco/Exception.h" #include "Poco/String.h" #include "Poco/ASCIIEncoding.h" -#include "Poco/Latin1Encoding.h" -#include "Poco/Latin2Encoding.h" -#include "Poco/Latin9Encoding.h" #include "Poco/UTF32Encoding.h" #include "Poco/UTF16Encoding.h" #include "Poco/UTF8Encoding.h" -#include "Poco/Windows1250Encoding.h" -#include "Poco/Windows1251Encoding.h" -#include "Poco/Windows1252Encoding.h" #include "Poco/RWLock.h" #include "Poco/SingletonHolder.h" #include @@ -47,15 +41,9 @@ public: add(pUtf8Encoding, TextEncoding::GLOBAL); add(new ASCIIEncoding); - add(new Latin1Encoding); - add(new Latin2Encoding); - add(new Latin9Encoding); add(pUtf8Encoding); add(new UTF16Encoding); add(new UTF32Encoding); - add(new Windows1250Encoding); - add(new Windows1251Encoding); - add(new Windows1252Encoding); } ~TextEncodingManager() diff --git a/base/poco/Foundation/src/Timezone_VX.cpp b/base/poco/Foundation/src/Timezone_VX.cpp deleted file mode 100644 index 18339bffab4..00000000000 --- a/base/poco/Foundation/src/Timezone_VX.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// -// Timezone_VXX.cpp -// -// Library: Foundation -// Package: DateTime -// Module: Timezone -// -// Copyright (c) 2004-2011, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Timezone.h" -#include "Poco/Exception.h" -#include "Poco/Environment.h" -#include - - -namespace Poco { - - -int Timezone::utcOffset() -{ - std::time_t now = std::time(NULL); - struct std::tm t; - gmtime_r(&now, &t); - std::time_t utc = std::mktime(&t); - return now - utc; -} - - -int Timezone::dst() -{ - std::time_t now = std::time(NULL); - struct std::tm t; - if (localtime_r(&now, &t) != OK) - throw Poco::SystemException("cannot get local time DST offset"); - return t.tm_isdst == 1 ? 3600 : 0; -} - - -bool Timezone::isDst(const Timestamp& timestamp) -{ - std::time_t time = timestamp.epochTime(); - struct std::tm* tms = std::localtime(&time); - if (!tms) throw Poco::SystemException("cannot get local time DST flag"); - return tms->tm_isdst > 0; -} - - -std::string Timezone::name() -{ - // format of TIMEZONE environment variable: - // name_of_zone:<(unused)>:time_in_minutes_from_UTC:daylight_start:daylight_end - std::string tz = Environment::get("TIMEZONE", "UTC"); - std::string::size_type pos = tz.find(':'); - if (pos != std::string::npos) - return tz.substr(0, pos); - else - return tz; -} - - -std::string Timezone::standardName() -{ - return name(); -} - - -std::string Timezone::dstName() -{ - return name(); -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Windows1251Encoding.cpp b/base/poco/Foundation/src/Windows1251Encoding.cpp deleted file mode 100644 index 91f1d23a859..00000000000 --- a/base/poco/Foundation/src/Windows1251Encoding.cpp +++ /dev/null @@ -1,237 +0,0 @@ -// -// Windows1251Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Windows1251Encoding -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Windows1251Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Windows1251Encoding::_names[] = -{ - "windows-1251", - "Windows-1251", - "cp1251", - "CP1251", - NULL -}; - - -const TextEncoding::CharacterMap Windows1251Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021, 0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f, - /* 90 */ 0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0xfffe, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f, - /* a0 */ 0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7, 0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407, - /* b0 */ 0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7, 0x0451, 0x2116, 0x0454, 0x00bb, 0x0458, 0x0405, 0x0455, 0x0457, - /* c0 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, - /* d0 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, - /* e0 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, - /* f0 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, -}; - - -Windows1251Encoding::Windows1251Encoding() -{ -} - - -Windows1251Encoding::~Windows1251Encoding() -{ -} - - -const char* Windows1251Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Windows1251Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Windows1251Encoding::characterMap() const -{ - return _charMap; -} - - -int Windows1251Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Windows1251Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = (unsigned char) ch; - return 1; - } - else switch(ch) - { - case 0x0402: if (bytes && length >= 1) *bytes = 0x80; return 1; - case 0x0403: if (bytes && length >= 1) *bytes = 0x81; return 1; - case 0x201a: if (bytes && length >= 1) *bytes = 0x82; return 1; - case 0x0453: if (bytes && length >= 1) *bytes = 0x83; return 1; - case 0x201e: if (bytes && length >= 1) *bytes = 0x84; return 1; - case 0x2026: if (bytes && length >= 1) *bytes = 0x85; return 1; - case 0x2020: if (bytes && length >= 1) *bytes = 0x86; return 1; - case 0x2021: if (bytes && length >= 1) *bytes = 0x87; return 1; - case 0x20ac: if (bytes && length >= 1) *bytes = 0x88; return 1; - case 0x2030: if (bytes && length >= 1) *bytes = 0x89; return 1; - case 0x0409: if (bytes && length >= 1) *bytes = 0x8a; return 1; - case 0x2039: if (bytes && length >= 1) *bytes = 0x8b; return 1; - case 0x040a: if (bytes && length >= 1) *bytes = 0x8c; return 1; - case 0x040c: if (bytes && length >= 1) *bytes = 0x8d; return 1; - case 0x040b: if (bytes && length >= 1) *bytes = 0x8e; return 1; - case 0x040f: if (bytes && length >= 1) *bytes = 0x8f; return 1; - case 0x0452: if (bytes && length >= 1) *bytes = 0x90; return 1; - case 0x2018: if (bytes && length >= 1) *bytes = 0x91; return 1; - case 0x2019: if (bytes && length >= 1) *bytes = 0x92; return 1; - case 0x201c: if (bytes && length >= 1) *bytes = 0x93; return 1; - case 0x201d: if (bytes && length >= 1) *bytes = 0x94; return 1; - case 0x2022: if (bytes && length >= 1) *bytes = 0x95; return 1; - case 0x2013: if (bytes && length >= 1) *bytes = 0x96; return 1; - case 0x2014: if (bytes && length >= 1) *bytes = 0x97; return 1; - case 0xfffe: if (bytes && length >= 1) *bytes = 0x98; return 1; - case 0x2122: if (bytes && length >= 1) *bytes = 0x99; return 1; - case 0x0459: if (bytes && length >= 1) *bytes = 0x9a; return 1; - case 0x203a: if (bytes && length >= 1) *bytes = 0x9b; return 1; - case 0x045a: if (bytes && length >= 1) *bytes = 0x9c; return 1; - case 0x045c: if (bytes && length >= 1) *bytes = 0x9d; return 1; - case 0x045b: if (bytes && length >= 1) *bytes = 0x9e; return 1; - case 0x045f: if (bytes && length >= 1) *bytes = 0x9f; return 1; - case 0x040e: if (bytes && length >= 1) *bytes = 0xa1; return 1; - case 0x045e: if (bytes && length >= 1) *bytes = 0xa2; return 1; - case 0x0408: if (bytes && length >= 1) *bytes = 0xa3; return 1; - case 0x0490: if (bytes && length >= 1) *bytes = 0xa5; return 1; - case 0x0401: if (bytes && length >= 1) *bytes = 0xa8; return 1; - case 0x0404: if (bytes && length >= 1) *bytes = 0xaa; return 1; - case 0x0407: if (bytes && length >= 1) *bytes = 0xaf; return 1; - case 0x0406: if (bytes && length >= 1) *bytes = 0xb2; return 1; - case 0x0456: if (bytes && length >= 1) *bytes = 0xb3; return 1; - case 0x0491: if (bytes && length >= 1) *bytes = 0xb4; return 1; - case 0x0451: if (bytes && length >= 1) *bytes = 0xb8; return 1; - case 0x2116: if (bytes && length >= 1) *bytes = 0xb9; return 1; - case 0x0454: if (bytes && length >= 1) *bytes = 0xba; return 1; - case 0x0458: if (bytes && length >= 1) *bytes = 0xbc; return 1; - case 0x0405: if (bytes && length >= 1) *bytes = 0xbd; return 1; - case 0x0455: if (bytes && length >= 1) *bytes = 0xbe; return 1; - case 0x0457: if (bytes && length >= 1) *bytes = 0xbf; return 1; - case 0x0410: if (bytes && length >= 1) *bytes = 0xc0; return 1; - case 0x0411: if (bytes && length >= 1) *bytes = 0xc1; return 1; - case 0x0412: if (bytes && length >= 1) *bytes = 0xc2; return 1; - case 0x0413: if (bytes && length >= 1) *bytes = 0xc3; return 1; - case 0x0414: if (bytes && length >= 1) *bytes = 0xc4; return 1; - case 0x0415: if (bytes && length >= 1) *bytes = 0xc5; return 1; - case 0x0416: if (bytes && length >= 1) *bytes = 0xc6; return 1; - case 0x0417: if (bytes && length >= 1) *bytes = 0xc7; return 1; - case 0x0418: if (bytes && length >= 1) *bytes = 0xc8; return 1; - case 0x0419: if (bytes && length >= 1) *bytes = 0xc9; return 1; - case 0x041a: if (bytes && length >= 1) *bytes = 0xca; return 1; - case 0x041b: if (bytes && length >= 1) *bytes = 0xcb; return 1; - case 0x041c: if (bytes && length >= 1) *bytes = 0xcc; return 1; - case 0x041d: if (bytes && length >= 1) *bytes = 0xcd; return 1; - case 0x041e: if (bytes && length >= 1) *bytes = 0xce; return 1; - case 0x041f: if (bytes && length >= 1) *bytes = 0xcf; return 1; - case 0x0420: if (bytes && length >= 1) *bytes = 0xd0; return 1; - case 0x0421: if (bytes && length >= 1) *bytes = 0xd1; return 1; - case 0x0422: if (bytes && length >= 1) *bytes = 0xd2; return 1; - case 0x0423: if (bytes && length >= 1) *bytes = 0xd3; return 1; - case 0x0424: if (bytes && length >= 1) *bytes = 0xd4; return 1; - case 0x0425: if (bytes && length >= 1) *bytes = 0xd5; return 1; - case 0x0426: if (bytes && length >= 1) *bytes = 0xd6; return 1; - case 0x0427: if (bytes && length >= 1) *bytes = 0xd7; return 1; - case 0x0428: if (bytes && length >= 1) *bytes = 0xd8; return 1; - case 0x0429: if (bytes && length >= 1) *bytes = 0xd9; return 1; - case 0x042a: if (bytes && length >= 1) *bytes = 0xda; return 1; - case 0x042b: if (bytes && length >= 1) *bytes = 0xdb; return 1; - case 0x042c: if (bytes && length >= 1) *bytes = 0xdc; return 1; - case 0x042d: if (bytes && length >= 1) *bytes = 0xdd; return 1; - case 0x042e: if (bytes && length >= 1) *bytes = 0xde; return 1; - case 0x042f: if (bytes && length >= 1) *bytes = 0xdf; return 1; - case 0x0430: if (bytes && length >= 1) *bytes = 0xe0; return 1; - case 0x0431: if (bytes && length >= 1) *bytes = 0xe1; return 1; - case 0x0432: if (bytes && length >= 1) *bytes = 0xe2; return 1; - case 0x0433: if (bytes && length >= 1) *bytes = 0xe3; return 1; - case 0x0434: if (bytes && length >= 1) *bytes = 0xe4; return 1; - case 0x0435: if (bytes && length >= 1) *bytes = 0xe5; return 1; - case 0x0436: if (bytes && length >= 1) *bytes = 0xe6; return 1; - case 0x0437: if (bytes && length >= 1) *bytes = 0xe7; return 1; - case 0x0438: if (bytes && length >= 1) *bytes = 0xe8; return 1; - case 0x0439: if (bytes && length >= 1) *bytes = 0xe9; return 1; - case 0x043a: if (bytes && length >= 1) *bytes = 0xea; return 1; - case 0x043b: if (bytes && length >= 1) *bytes = 0xeb; return 1; - case 0x043c: if (bytes && length >= 1) *bytes = 0xec; return 1; - case 0x043d: if (bytes && length >= 1) *bytes = 0xed; return 1; - case 0x043e: if (bytes && length >= 1) *bytes = 0xee; return 1; - case 0x043f: if (bytes && length >= 1) *bytes = 0xef; return 1; - case 0x0440: if (bytes && length >= 1) *bytes = 0xf0; return 1; - case 0x0441: if (bytes && length >= 1) *bytes = 0xf1; return 1; - case 0x0442: if (bytes && length >= 1) *bytes = 0xf2; return 1; - case 0x0443: if (bytes && length >= 1) *bytes = 0xf3; return 1; - case 0x0444: if (bytes && length >= 1) *bytes = 0xf4; return 1; - case 0x0445: if (bytes && length >= 1) *bytes = 0xf5; return 1; - case 0x0446: if (bytes && length >= 1) *bytes = 0xf6; return 1; - case 0x0447: if (bytes && length >= 1) *bytes = 0xf7; return 1; - case 0x0448: if (bytes && length >= 1) *bytes = 0xf8; return 1; - case 0x0449: if (bytes && length >= 1) *bytes = 0xf9; return 1; - case 0x044a: if (bytes && length >= 1) *bytes = 0xfa; return 1; - case 0x044b: if (bytes && length >= 1) *bytes = 0xfb; return 1; - case 0x044c: if (bytes && length >= 1) *bytes = 0xfc; return 1; - case 0x044d: if (bytes && length >= 1) *bytes = 0xfd; return 1; - case 0x044e: if (bytes && length >= 1) *bytes = 0xfe; return 1; - case 0x044f: if (bytes && length >= 1) *bytes = 0xff; return 1; - default: return 0; - } -} - - -int Windows1251Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Windows1251Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco - diff --git a/base/poco/Foundation/src/Windows1252Encoding.cpp b/base/poco/Foundation/src/Windows1252Encoding.cpp deleted file mode 100644 index b73a19dd90d..00000000000 --- a/base/poco/Foundation/src/Windows1252Encoding.cpp +++ /dev/null @@ -1,151 +0,0 @@ -// -// Windows1252Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Windows1252Encoding -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Windows1252Encoding.h" -#include "Poco/String.h" -#include - - -namespace Poco { - - -const char* Windows1252Encoding::_names[] = -{ - "windows-1252", - "Windows-1252", - "cp1252", - "CP1252", - NULL -}; - - -const TextEncoding::CharacterMap Windows1252Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f, - /* 90 */ 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178, - /* a0 */ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - /* b0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - /* c0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - /* d0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - /* e0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - /* f0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -}; - - -Windows1252Encoding::Windows1252Encoding() -{ -} - - -Windows1252Encoding::~Windows1252Encoding() -{ -} - - -const char* Windows1252Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Windows1252Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Windows1252Encoding::characterMap() const -{ - return _charMap; -} - - -int Windows1252Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Windows1252Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = ch; - return 1; - } - else switch (ch) - { - case 0x20ac: if (bytes && length >= 1) *bytes = 0x80; return 1; - case 0x201a: if (bytes && length >= 1) *bytes = 0x82; return 1; - case 0x0192: if (bytes && length >= 1) *bytes = 0x83; return 1; - case 0x201e: if (bytes && length >= 1) *bytes = 0x84; return 1; - case 0x2026: if (bytes && length >= 1) *bytes = 0x85; return 1; - case 0x2020: if (bytes && length >= 1) *bytes = 0x86; return 1; - case 0x2021: if (bytes && length >= 1) *bytes = 0x87; return 1; - case 0x02c6: if (bytes && length >= 1) *bytes = 0x88; return 1; - case 0x2030: if (bytes && length >= 1) *bytes = 0x89; return 1; - case 0x0160: if (bytes && length >= 1) *bytes = 0x8a; return 1; - case 0x2039: if (bytes && length >= 1) *bytes = 0x8b; return 1; - case 0x0152: if (bytes && length >= 1) *bytes = 0x8c; return 1; - case 0x017d: if (bytes && length >= 1) *bytes = 0x8e; return 1; - case 0x2018: if (bytes && length >= 1) *bytes = 0x91; return 1; - case 0x2019: if (bytes && length >= 1) *bytes = 0x92; return 1; - case 0x201c: if (bytes && length >= 1) *bytes = 0x93; return 1; - case 0x201d: if (bytes && length >= 1) *bytes = 0x94; return 1; - case 0x2022: if (bytes && length >= 1) *bytes = 0x95; return 1; - case 0x2013: if (bytes && length >= 1) *bytes = 0x96; return 1; - case 0x2014: if (bytes && length >= 1) *bytes = 0x97; return 1; - case 0x02dc: if (bytes && length >= 1) *bytes = 0x98; return 1; - case 0x2122: if (bytes && length >= 1) *bytes = 0x99; return 1; - case 0x0161: if (bytes && length >= 1) *bytes = 0x9a; return 1; - case 0x203a: if (bytes && length >= 1) *bytes = 0x9b; return 1; - case 0x0153: if (bytes && length >= 1) *bytes = 0x9c; return 1; - case 0x017e: if (bytes && length >= 1) *bytes = 0x9e; return 1; - case 0x0178: if (bytes && length >= 1) *bytes = 0x9f; return 1; - default: return 0; - } -} - - -int Windows1252Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Windows1252Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/WindowsConsoleChannel.cpp b/base/poco/Foundation/src/WindowsConsoleChannel.cpp deleted file mode 100644 index 48665bb178a..00000000000 --- a/base/poco/Foundation/src/WindowsConsoleChannel.cpp +++ /dev/null @@ -1,269 +0,0 @@ -// -// WindowsConsoleChannel.cpp -// -// Library: Foundation -// Package: Logging -// Module: WindowsConsoleChannel -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/WindowsConsoleChannel.h" -#include "Poco/Message.h" -#include "Poco/String.h" -#include "Poco/Exception.h" - - -namespace Poco { - - -WindowsConsoleChannel::WindowsConsoleChannel(): - _isFile(false), - _hConsole(INVALID_HANDLE_VALUE) -{ - _hConsole = GetStdHandle(STD_OUTPUT_HANDLE); - // check whether the console has been redirected - DWORD mode; - _isFile = (GetConsoleMode(_hConsole, &mode) == 0); -} - - -WindowsConsoleChannel::~WindowsConsoleChannel() -{ -} - - -void WindowsConsoleChannel::log(const Message& msg) -{ - std::string text = msg.getText(); - text += "\r\n"; - - DWORD written; - WriteFile(_hConsole, text.data(), text.size(), &written, NULL); -} - - -WindowsColorConsoleChannel::WindowsColorConsoleChannel(): - _enableColors(true), - _isFile(false), - _hConsole(INVALID_HANDLE_VALUE) -{ - _hConsole = GetStdHandle(STD_OUTPUT_HANDLE); - // check whether the console has been redirected - DWORD mode; - _isFile = (GetConsoleMode(_hConsole, &mode) == 0); - initColors(); -} - - -WindowsColorConsoleChannel::~WindowsColorConsoleChannel() -{ -} - - -void WindowsColorConsoleChannel::log(const Message& msg) -{ - std::string text = msg.getText(); - text += "\r\n"; - - if (_enableColors && !_isFile) - { - WORD attr = _colors[0]; - attr &= 0xFFF0; - attr |= _colors[msg.getPriority()]; - SetConsoleTextAttribute(_hConsole, attr); - } - - DWORD written; - WriteFile(_hConsole, text.data(), text.size(), &written, NULL); - - if (_enableColors && !_isFile) - { - SetConsoleTextAttribute(_hConsole, _colors[0]); - } -} - - -void WindowsColorConsoleChannel::setProperty(const std::string& name, const std::string& value) -{ - if (name == "enableColors") - { - _enableColors = icompare(value, "true") == 0; - } - else if (name == "traceColor") - { - _colors[Message::PRIO_TRACE] = parseColor(value); - } - else if (name == "debugColor") - { - _colors[Message::PRIO_DEBUG] = parseColor(value); - } - else if (name == "informationColor") - { - _colors[Message::PRIO_INFORMATION] = parseColor(value); - } - else if (name == "noticeColor") - { - _colors[Message::PRIO_NOTICE] = parseColor(value); - } - else if (name == "warningColor") - { - _colors[Message::PRIO_WARNING] = parseColor(value); - } - else if (name == "errorColor") - { - _colors[Message::PRIO_ERROR] = parseColor(value); - } - else if (name == "criticalColor") - { - _colors[Message::PRIO_CRITICAL] = parseColor(value); - } - else if (name == "fatalColor") - { - _colors[Message::PRIO_FATAL] = parseColor(value); - } - else - { - Channel::setProperty(name, value); - } -} - - -std::string WindowsColorConsoleChannel::getProperty(const std::string& name) const -{ - if (name == "enableColors") - { - return _enableColors ? "true" : "false"; - } - else if (name == "traceColor") - { - return formatColor(_colors[Message::PRIO_TRACE]); - } - else if (name == "debugColor") - { - return formatColor(_colors[Message::PRIO_DEBUG]); - } - else if (name == "informationColor") - { - return formatColor(_colors[Message::PRIO_INFORMATION]); - } - else if (name == "noticeColor") - { - return formatColor(_colors[Message::PRIO_NOTICE]); - } - else if (name == "warningColor") - { - return formatColor(_colors[Message::PRIO_WARNING]); - } - else if (name == "errorColor") - { - return formatColor(_colors[Message::PRIO_ERROR]); - } - else if (name == "criticalColor") - { - return formatColor(_colors[Message::PRIO_CRITICAL]); - } - else if (name == "fatalColor") - { - return formatColor(_colors[Message::PRIO_FATAL]); - } - else - { - return Channel::getProperty(name); - } -} - - -WORD WindowsColorConsoleChannel::parseColor(const std::string& color) const -{ - if (icompare(color, "default") == 0) - return _colors[0]; - else if (icompare(color, "black") == 0) - return CC_BLACK; - else if (icompare(color, "red") == 0) - return CC_RED; - else if (icompare(color, "green") == 0) - return CC_GREEN; - else if (icompare(color, "brown") == 0) - return CC_BROWN; - else if (icompare(color, "blue") == 0) - return CC_BLUE; - else if (icompare(color, "magenta") == 0) - return CC_MAGENTA; - else if (icompare(color, "cyan") == 0) - return CC_CYAN; - else if (icompare(color, "gray") == 0) - return CC_GRAY; - else if (icompare(color, "darkGray") == 0) - return CC_DARKGRAY; - else if (icompare(color, "lightRed") == 0) - return CC_LIGHTRED; - else if (icompare(color, "lightGreen") == 0) - return CC_LIGHTGREEN; - else if (icompare(color, "yellow") == 0) - return CC_YELLOW; - else if (icompare(color, "lightBlue") == 0) - return CC_LIGHTBLUE; - else if (icompare(color, "lightMagenta") == 0) - return CC_LIGHTMAGENTA; - else if (icompare(color, "lightCyan") == 0) - return CC_LIGHTCYAN; - else if (icompare(color, "white") == 0) - return CC_WHITE; - else throw InvalidArgumentException("Invalid color value", color); -} - - -std::string WindowsColorConsoleChannel::formatColor(WORD color) const -{ - switch (color) - { - case CC_BLACK: return "black"; - case CC_RED: return "red"; - case CC_GREEN: return "green"; - case CC_BROWN: return "brown"; - case CC_BLUE: return "blue"; - case CC_MAGENTA: return "magenta"; - case CC_CYAN: return "cyan"; - case CC_GRAY: return "gray"; - case CC_DARKGRAY: return "darkGray"; - case CC_LIGHTRED: return "lightRed"; - case CC_LIGHTGREEN: return "lightGreen"; - case CC_YELLOW: return "yellow"; - case CC_LIGHTBLUE: return "lightBlue"; - case CC_LIGHTMAGENTA: return "lightMagenta"; - case CC_LIGHTCYAN: return "lightCyan"; - case CC_WHITE: return "white"; - default: return "invalid"; - } -} - - -void WindowsColorConsoleChannel::initColors() -{ - if (!_isFile) - { - CONSOLE_SCREEN_BUFFER_INFO csbi; - GetConsoleScreenBufferInfo(_hConsole, &csbi); - _colors[0] = csbi.wAttributes; - } - else - { - _colors[0] = CC_WHITE; - } - _colors[Message::PRIO_FATAL] = CC_LIGHTRED; - _colors[Message::PRIO_CRITICAL] = CC_LIGHTRED; - _colors[Message::PRIO_ERROR] = CC_LIGHTRED; - _colors[Message::PRIO_WARNING] = CC_YELLOW; - _colors[Message::PRIO_NOTICE] = _colors[0]; - _colors[Message::PRIO_INFORMATION] = _colors[0]; - _colors[Message::PRIO_DEBUG] = CC_GRAY; - _colors[Message::PRIO_TRACE] = CC_GRAY; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/adler32.c b/base/poco/Foundation/src/adler32.c deleted file mode 100644 index 8fa49192ada..00000000000 --- a/base/poco/Foundation/src/adler32.c +++ /dev/null @@ -1,188 +0,0 @@ -/* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995-2011, 2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id$ */ - -#include "zutil.h" - -#define local static - -local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); - -#define BASE 65521U /* largest prime smaller than 65536 */ -#define NMAX 5552 -/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ - -#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} -#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); -#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); -#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); -#define DO16(buf) DO8(buf,0); DO8(buf,8); - -/* use NO_DIVIDE if your processor does not do division in hardware -- - try it both ways to see which is faster */ -#ifdef NO_DIVIDE -/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 - (thank you to John Reiser for pointing this out) */ -# define CHOP(a) \ - do { \ - unsigned long tmp = a >> 16; \ - a &= 0xffffUL; \ - a += (tmp << 4) - tmp; \ - } while (0) -# define MOD28(a) \ - do { \ - CHOP(a); \ - if (a >= BASE) a -= BASE; \ - } while (0) -# define MOD(a) \ - do { \ - CHOP(a); \ - MOD28(a); \ - } while (0) -# define MOD63(a) \ - do { /* this assumes a is not negative */ \ - z_off64_t tmp = a >> 32; \ - a &= 0xffffffffL; \ - a += (tmp << 8) - (tmp << 5) + tmp; \ - tmp = a >> 16; \ - a &= 0xffffL; \ - a += (tmp << 4) - tmp; \ - tmp = a >> 16; \ - a &= 0xffffL; \ - a += (tmp << 4) - tmp; \ - if (a >= BASE) a -= BASE; \ - } while (0) -#else -# define MOD(a) a %= BASE -# define MOD28(a) a %= BASE -# define MOD63(a) a %= BASE -#endif - -/* ========================================================================= */ -uLong ZEXPORT adler32_z(adler, buf, len) - uLong adler; - const Bytef *buf; - z_size_t len; -{ - unsigned long sum2; - unsigned n; - - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; - - /* in case user likes doing a byte at a time, keep it fast */ - if (len == 1) { - adler += buf[0]; - if (adler >= BASE) - adler -= BASE; - sum2 += adler; - if (sum2 >= BASE) - sum2 -= BASE; - return adler | (sum2 << 16); - } - - /* initial Adler-32 value (deferred check for len == 1 speed) */ - if (buf == Z_NULL) - return 1L; - - /* in case short lengths are provided, keep it somewhat fast */ - if (len < 16) { - while (len--) { - adler += *buf++; - sum2 += adler; - } - if (adler >= BASE) - adler -= BASE; - MOD28(sum2); /* only added so many BASE's */ - return adler | (sum2 << 16); - } - - /* do length NMAX blocks -- requires just one modulo operation */ - while (len >= NMAX) { - len -= NMAX; - n = NMAX / 16; /* NMAX is divisible by 16 */ - do { - DO16(buf); /* 16 sums unrolled */ - buf += 16; - } while (--n); - MOD(adler); - MOD(sum2); - } - - /* do remaining bytes (less than NMAX, still just one modulo) */ - if (len) { /* avoid modulos if none remaining */ - while (len >= 16) { - len -= 16; - DO16(buf); - buf += 16; - } - while (len--) { - adler += *buf++; - sum2 += adler; - } - MOD(adler); - MOD(sum2); - } - - /* return recombined sums */ - return adler | (sum2 << 16); -} - -/* ========================================================================= */ -uLong ZEXPORT adler32(adler, buf, len) - uLong adler; - const Bytef *buf; - uInt len; -{ - return adler32_z(adler, buf, len); -} - -/* ========================================================================= */ -local uLong adler32_combine_(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off64_t len2; -{ - unsigned long sum1; - unsigned long sum2; - unsigned rem; - - /* for negative len, return invalid adler32 as a clue for debugging */ - if (len2 < 0) - return 0xffffffffUL; - - /* the derivation of this formula is left as an exercise for the reader */ - MOD63(len2); /* assumes len2 >= 0 */ - rem = (unsigned)len2; - sum1 = adler1 & 0xffff; - sum2 = rem * sum1; - MOD(sum2); - sum1 += (adler2 & 0xffff) + BASE - 1; - sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; - if (sum1 >= BASE) sum1 -= BASE; - if (sum1 >= BASE) sum1 -= BASE; - if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); - if (sum2 >= BASE) sum2 -= BASE; - return sum1 | (sum2 << 16); -} - -/* ========================================================================= */ -uLong ZEXPORT adler32_combine(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off_t len2; -{ - return adler32_combine_(adler1, adler2, len2); -} - -uLong ZEXPORT adler32_combine64(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off64_t len2; -{ - return adler32_combine_(adler1, adler2, len2); -} diff --git a/base/poco/Foundation/src/bignum-dtoa.cc b/base/poco/Foundation/src/bignum-dtoa.cc deleted file mode 100644 index 5a44adfccf3..00000000000 --- a/base/poco/Foundation/src/bignum-dtoa.cc +++ /dev/null @@ -1,641 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - -#include "bignum-dtoa.h" - -#include "bignum.h" -#include "ieee.h" - -namespace double_conversion { - -static int NormalizedExponent(uint64_t significand, int exponent) { - ASSERT(significand != 0); - while ((significand & Double::kHiddenBit) == 0) { - significand = significand << 1; - exponent = exponent - 1; - } - return exponent; -} - - -// Forward declarations: -// Returns an estimation of k such that 10^(k-1) <= v < 10^k. -static int EstimatePower(int exponent); -// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator -// and denominator. -static void InitialScaledStartValues(uint64_t significand, - int exponent, - bool lower_boundary_is_closer, - int estimated_power, - bool need_boundary_deltas, - Bignum* numerator, - Bignum* denominator, - Bignum* delta_minus, - Bignum* delta_plus); -// Multiplies numerator/denominator so that its values lies in the range 1-10. -// Returns decimal_point s.t. -// v = numerator'/denominator' * 10^(decimal_point-1) -// where numerator' and denominator' are the values of numerator and -// denominator after the call to this function. -static void FixupMultiply10(int estimated_power, bool is_even, - int* decimal_point, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus); -// Generates digits from the left to the right and stops when the generated -// digits yield the shortest decimal representation of v. -static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus, - bool is_even, - Vector buffer, int* length); -// Generates 'requested_digits' after the decimal point. -static void BignumToFixed(int requested_digits, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length); -// Generates 'count' digits of numerator/denominator. -// Once 'count' digits have been produced rounds the result depending on the -// remainder (remainders of exactly .5 round upwards). Might update the -// decimal_point when rounding up (for example for 0.9999). -static void GenerateCountedDigits(int count, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length); - - -void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, - Vector buffer, int* length, int* decimal_point) { - ASSERT(v > 0); - ASSERT(!Double(v).IsSpecial()); - uint64_t significand; - int exponent; - bool lower_boundary_is_closer; - if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { - float f = static_cast(v); - ASSERT(f == v); - significand = Single(f).Significand(); - exponent = Single(f).Exponent(); - lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); - } else { - significand = Double(v).Significand(); - exponent = Double(v).Exponent(); - lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); - } - bool need_boundary_deltas = - (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); - - bool is_even = (significand & 1) == 0; - int normalized_exponent = NormalizedExponent(significand, exponent); - // estimated_power might be too low by 1. - int estimated_power = EstimatePower(normalized_exponent); - - // Shortcut for Fixed. - // The requested digits correspond to the digits after the point. If the - // number is much too small, then there is no need in trying to get any - // digits. - if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { - buffer[0] = '\0'; - *length = 0; - // Set decimal-point to -requested_digits. This is what Gay does. - // Note that it should not have any effect anyways since the string is - // empty. - *decimal_point = -requested_digits; - return; - } - - Bignum numerator; - Bignum denominator; - Bignum delta_minus; - Bignum delta_plus; - // Make sure the bignum can grow large enough. The smallest double equals - // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. - // The maximum double is 1.7976931348623157e308 which needs fewer than - // 308*4 binary digits. - ASSERT(Bignum::kMaxSignificantBits >= 324*4); - InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, - estimated_power, need_boundary_deltas, - &numerator, &denominator, - &delta_minus, &delta_plus); - // We now have v = (numerator / denominator) * 10^estimated_power. - FixupMultiply10(estimated_power, is_even, decimal_point, - &numerator, &denominator, - &delta_minus, &delta_plus); - // We now have v = (numerator / denominator) * 10^(decimal_point-1), and - // 1 <= (numerator + delta_plus) / denominator < 10 - switch (mode) { - case BIGNUM_DTOA_SHORTEST: - case BIGNUM_DTOA_SHORTEST_SINGLE: - GenerateShortestDigits(&numerator, &denominator, - &delta_minus, &delta_plus, - is_even, buffer, length); - break; - case BIGNUM_DTOA_FIXED: - BignumToFixed(requested_digits, decimal_point, - &numerator, &denominator, - buffer, length); - break; - case BIGNUM_DTOA_PRECISION: - GenerateCountedDigits(requested_digits, decimal_point, - &numerator, &denominator, - buffer, length); - break; - default: - UNREACHABLE(); - } - buffer[*length] = '\0'; -} - - -// The procedure starts generating digits from the left to the right and stops -// when the generated digits yield the shortest decimal representation of v. A -// decimal representation of v is a number lying closer to v than to any other -// double, so it converts to v when read. -// -// This is true if d, the decimal representation, is between m- and m+, the -// upper and lower boundaries. d must be strictly between them if !is_even. -// m- := (numerator - delta_minus) / denominator -// m+ := (numerator + delta_plus) / denominator -// -// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. -// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit -// will be produced. This should be the standard precondition. -static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus, - bool is_even, - Vector buffer, int* length) { - // Small optimization: if delta_minus and delta_plus are the same just reuse - // one of the two bignums. - if (Bignum::Equal(*delta_minus, *delta_plus)) { - delta_plus = delta_minus; - } - *length = 0; - for (;;) { - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. - // digit = numerator / denominator (integer division). - // numerator = numerator % denominator. - buffer[(*length)++] = static_cast(digit + '0'); - - // Can we stop already? - // If the remainder of the division is less than the distance to the lower - // boundary we can stop. In this case we simply round down (discarding the - // remainder). - // Similarly we test if we can round up (using the upper boundary). - bool in_delta_room_minus; - bool in_delta_room_plus; - if (is_even) { - in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); - } else { - in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); - } - if (is_even) { - in_delta_room_plus = - Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; - } else { - in_delta_room_plus = - Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; - } - if (!in_delta_room_minus && !in_delta_room_plus) { - // Prepare for next iteration. - numerator->Times10(); - delta_minus->Times10(); - // We optimized delta_plus to be equal to delta_minus (if they share the - // same value). So don't multiply delta_plus if they point to the same - // object. - if (delta_minus != delta_plus) { - delta_plus->Times10(); - } - } else if (in_delta_room_minus && in_delta_room_plus) { - // Let's see if 2*numerator < denominator. - // If yes, then the next digit would be < 5 and we can round down. - int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); - if (compare < 0) { - // Remaining digits are less than .5. -> Round down (== do nothing). - } else if (compare > 0) { - // Remaining digits are more than .5 of denominator. -> Round up. - // Note that the last digit could not be a '9' as otherwise the whole - // loop would have stopped earlier. - // We still have an assert here in case the preconditions were not - // satisfied. - ASSERT(buffer[(*length) - 1] != '9'); - buffer[(*length) - 1]++; - } else { - // Halfway case. - // TODO(floitsch): need a way to solve half-way cases. - // For now let's round towards even (since this is what Gay seems to - // do). - - if ((buffer[(*length) - 1] - '0') % 2 == 0) { - // Round down => Do nothing. - } else { - ASSERT(buffer[(*length) - 1] != '9'); - buffer[(*length) - 1]++; - } - } - return; - } else if (in_delta_room_minus) { - // Round down (== do nothing). - return; - } else { // in_delta_room_plus - // Round up. - // Note again that the last digit could not be '9' since this would have - // stopped the loop earlier. - // We still have an ASSERT here, in case the preconditions were not - // satisfied. - ASSERT(buffer[(*length) -1] != '9'); - buffer[(*length) - 1]++; - return; - } - } -} - - -// Let v = numerator / denominator < 10. -// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) -// from left to right. Once 'count' digits have been produced we decide whether -// to round up or down. Remainders of exactly .5 round upwards. Numbers such -// as 9.999999 propagate a carry all the way, and change the -// exponent (decimal_point), when rounding upwards. -static void GenerateCountedDigits(int count, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector buffer, int* length) { - ASSERT(count >= 0); - for (int i = 0; i < count - 1; ++i) { - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. - // digit = numerator / denominator (integer division). - // numerator = numerator % denominator. - buffer[i] = static_cast(digit + '0'); - // Prepare for next iteration. - numerator->Times10(); - } - // Generate the last digit. - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { - digit++; - } - ASSERT(digit <= 10); - buffer[count - 1] = static_cast(digit + '0'); - // Correct bad digits (in case we had a sequence of '9's). Propagate the - // carry until we hat a non-'9' or til we reach the first digit. - for (int i = count - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) break; - buffer[i] = '0'; - buffer[i - 1]++; - } - if (buffer[0] == '0' + 10) { - // Propagate a carry past the top place. - buffer[0] = '1'; - (*decimal_point)++; - } - *length = count; -} - - -// Generates 'requested_digits' after the decimal point. It might omit -// trailing '0's. If the input number is too small then no digits at all are -// generated (ex.: 2 fixed digits for 0.00001). -// -// Input verifies: 1 <= (numerator + delta) / denominator < 10. -static void BignumToFixed(int requested_digits, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length) { - // Note that we have to look at more than just the requested_digits, since - // a number could be rounded up. Example: v=0.5 with requested_digits=0. - // Even though the power of v equals 0 we can't just stop here. - if (-(*decimal_point) > requested_digits) { - // The number is definitively too small. - // Ex: 0.001 with requested_digits == 1. - // Set decimal-point to -requested_digits. This is what Gay does. - // Note that it should not have any effect anyways since the string is - // empty. - *decimal_point = -requested_digits; - *length = 0; - return; - } else if (-(*decimal_point) == requested_digits) { - // We only need to verify if the number rounds down or up. - // Ex: 0.04 and 0.06 with requested_digits == 1. - ASSERT(*decimal_point == -requested_digits); - // Initially the fraction lies in range (1, 10]. Multiply the denominator - // by 10 so that we can compare more easily. - denominator->Times10(); - if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { - // If the fraction is >= 0.5 then we have to include the rounded - // digit. - buffer[0] = '1'; - *length = 1; - (*decimal_point)++; - } else { - // Note that we caught most of similar cases earlier. - *length = 0; - } - return; - } else { - // The requested digits correspond to the digits after the point. - // The variable 'needed_digits' includes the digits before the point. - int needed_digits = (*decimal_point) + requested_digits; - GenerateCountedDigits(needed_digits, decimal_point, - numerator, denominator, - buffer, length); - } -} - - -// Returns an estimation of k such that 10^(k-1) <= v < 10^k where -// v = f * 2^exponent and 2^52 <= f < 2^53. -// v is hence a normalized double with the given exponent. The output is an -// approximation for the exponent of the decimal approimation .digits * 10^k. -// -// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. -// Note: this property holds for v's upper boundary m+ too. -// 10^k <= m+ < 10^k+1. -// (see explanation below). -// -// Examples: -// EstimatePower(0) => 16 -// EstimatePower(-52) => 0 -// -// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. -static int EstimatePower(int exponent) { - // This function estimates log10 of v where v = f*2^e (with e == exponent). - // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). - // Note that f is bounded by its container size. Let p = 53 (the double's - // significand size). Then 2^(p-1) <= f < 2^p. - // - // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close - // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). - // The computed number undershoots by less than 0.631 (when we compute log3 - // and not log10). - // - // Optimization: since we only need an approximated result this computation - // can be performed on 64 bit integers. On x86/x64 architecture the speedup is - // not really measurable, though. - // - // Since we want to avoid overshooting we decrement by 1e10 so that - // floating-point imprecisions don't affect us. - // - // Explanation for v's boundary m+: the computation takes advantage of - // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement - // (even for denormals where the delta can be much more important). - - const double k1Log10 = 0.30102999566398114; // 1/lg(10) - - // For doubles len(f) == 53 (don't forget the hidden bit). - const int kSignificandSize = Double::kSignificandSize; - double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); - return static_cast(estimate); -} - - -// See comments for InitialScaledStartValues. -static void InitialScaledStartValuesPositiveExponent( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // A positive exponent implies a positive power. - ASSERT(estimated_power >= 0); - // Since the estimated_power is positive we simply multiply the denominator - // by 10^estimated_power. - - // numerator = v. - numerator->AssignUInt64(significand); - numerator->ShiftLeft(exponent); - // denominator = 10^estimated_power. - denominator->AssignPowerUInt16(10, estimated_power); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - denominator->ShiftLeft(1); - numerator->ShiftLeft(1); - // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common - // denominator (of 2) delta_plus equals 2^e. - delta_plus->AssignUInt16(1); - delta_plus->ShiftLeft(exponent); - // Same for delta_minus. The adjustments if f == 2^p-1 are done later. - delta_minus->AssignUInt16(1); - delta_minus->ShiftLeft(exponent); - } -} - - -// See comments for InitialScaledStartValues -static void InitialScaledStartValuesNegativeExponentPositivePower( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // v = f * 2^e with e < 0, and with estimated_power >= 0. - // This means that e is close to 0 (have a look at how estimated_power is - // computed). - - // numerator = significand - // since v = significand * 2^exponent this is equivalent to - // numerator = v * / 2^-exponent - numerator->AssignUInt64(significand); - // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) - denominator->AssignPowerUInt16(10, estimated_power); - denominator->ShiftLeft(-exponent); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - denominator->ShiftLeft(1); - numerator->ShiftLeft(1); - // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common - // denominator (of 2) delta_plus equals 2^e. - // Given that the denominator already includes v's exponent the distance - // to the boundaries is simply 1. - delta_plus->AssignUInt16(1); - // Same for delta_minus. The adjustments if f == 2^p-1 are done later. - delta_minus->AssignUInt16(1); - } -} - - -// See comments for InitialScaledStartValues -static void InitialScaledStartValuesNegativeExponentNegativePower( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // Instead of multiplying the denominator with 10^estimated_power we - // multiply all values (numerator and deltas) by 10^-estimated_power. - - // Use numerator as temporary container for power_ten. - Bignum* power_ten = numerator; - power_ten->AssignPowerUInt16(10, -estimated_power); - - if (need_boundary_deltas) { - // Since power_ten == numerator we must make a copy of 10^estimated_power - // before we complete the computation of the numerator. - // delta_plus = delta_minus = 10^estimated_power - delta_plus->AssignBignum(*power_ten); - delta_minus->AssignBignum(*power_ten); - } - - // numerator = significand * 2 * 10^-estimated_power - // since v = significand * 2^exponent this is equivalent to - // numerator = v * 10^-estimated_power * 2 * 2^-exponent. - // Remember: numerator has been abused as power_ten. So no need to assign it - // to itself. - ASSERT(numerator == power_ten); - numerator->MultiplyByUInt64(significand); - - // denominator = 2 * 2^-exponent with exponent < 0. - denominator->AssignUInt16(1); - denominator->ShiftLeft(-exponent); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - numerator->ShiftLeft(1); - denominator->ShiftLeft(1); - // With this shift the boundaries have their correct value, since - // delta_plus = 10^-estimated_power, and - // delta_minus = 10^-estimated_power. - // These assignments have been done earlier. - // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. - } -} - - -// Let v = significand * 2^exponent. -// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator -// and denominator. The functions GenerateShortestDigits and -// GenerateCountedDigits will then convert this ratio to its decimal -// representation d, with the required accuracy. -// Then d * 10^estimated_power is the representation of v. -// (Note: the fraction and the estimated_power might get adjusted before -// generating the decimal representation.) -// -// The initial start values consist of: -// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. -// - a scaled (common) denominator. -// optionally (used by GenerateShortestDigits to decide if it has the shortest -// decimal converting back to v): -// - v - m-: the distance to the lower boundary. -// - m+ - v: the distance to the upper boundary. -// -// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. -// -// Let ep == estimated_power, then the returned values will satisfy: -// v / 10^ep = numerator / denominator. -// v's boundaries m- and m+: -// m- / 10^ep == v / 10^ep - delta_minus / denominator -// m+ / 10^ep == v / 10^ep + delta_plus / denominator -// Or in other words: -// m- == v - delta_minus * 10^ep / denominator; -// m+ == v + delta_plus * 10^ep / denominator; -// -// Since 10^(k-1) <= v < 10^k (with k == estimated_power) -// or 10^k <= v < 10^(k+1) -// we then have 0.1 <= numerator/denominator < 1 -// or 1 <= numerator/denominator < 10 -// -// It is then easy to kickstart the digit-generation routine. -// -// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST -// or BIGNUM_DTOA_SHORTEST_SINGLE. - -static void InitialScaledStartValues(uint64_t significand, - int exponent, - bool lower_boundary_is_closer, - int estimated_power, - bool need_boundary_deltas, - Bignum* numerator, - Bignum* denominator, - Bignum* delta_minus, - Bignum* delta_plus) { - if (exponent >= 0) { - InitialScaledStartValuesPositiveExponent( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } else if (estimated_power >= 0) { - InitialScaledStartValuesNegativeExponentPositivePower( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } else { - InitialScaledStartValuesNegativeExponentNegativePower( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } - - if (need_boundary_deltas && lower_boundary_is_closer) { - // The lower boundary is closer at half the distance of "normal" numbers. - // Increase the common denominator and adapt all but the delta_minus. - denominator->ShiftLeft(1); // *2 - numerator->ShiftLeft(1); // *2 - delta_plus->ShiftLeft(1); // *2 - } -} - - -// This routine multiplies numerator/denominator so that its values lies in the -// range 1-10. That is after a call to this function we have: -// 1 <= (numerator + delta_plus) /denominator < 10. -// Let numerator the input before modification and numerator' the argument -// after modification, then the output-parameter decimal_point is such that -// numerator / denominator * 10^estimated_power == -// numerator' / denominator' * 10^(decimal_point - 1) -// In some cases estimated_power was too low, and this is already the case. We -// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == -// estimated_power) but do not touch the numerator or denominator. -// Otherwise the routine multiplies the numerator and the deltas by 10. -static void FixupMultiply10(int estimated_power, bool is_even, - int* decimal_point, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - bool in_range; - if (is_even) { - // For IEEE doubles half-way cases (in decimal system numbers ending with 5) - // are rounded to the closest floating-point number with even significand. - in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; - } else { - in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; - } - if (in_range) { - // Since numerator + delta_plus >= denominator we already have - // 1 <= numerator/denominator < 10. Simply update the estimated_power. - *decimal_point = estimated_power + 1; - } else { - *decimal_point = estimated_power; - numerator->Times10(); - if (Bignum::Equal(*delta_minus, *delta_plus)) { - delta_minus->Times10(); - delta_plus->AssignBignum(*delta_minus); - } else { - delta_minus->Times10(); - delta_plus->Times10(); - } - } -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/bignum-dtoa.h b/base/poco/Foundation/src/bignum-dtoa.h deleted file mode 100644 index 6ee62aa23df..00000000000 --- a/base/poco/Foundation/src/bignum-dtoa.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ -#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ - -#include "utils.h" - -namespace double_conversion -{ - -enum BignumDtoaMode -{ - // Return the shortest correct representation. - // For example the output of 0.299999999999999988897 is (the less accurate but - // correct) 0.3. - BIGNUM_DTOA_SHORTEST, - // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. - BIGNUM_DTOA_SHORTEST_SINGLE, - // Return a fixed number of digits after the decimal point. - // For instance fixed(0.1, 4) becomes 0.1000 - // If the input number is big, the output will be big. - BIGNUM_DTOA_FIXED, - // Return a fixed number of digits, no matter what the exponent is. - BIGNUM_DTOA_PRECISION -}; - -// Converts the given double 'v' to ascii. -// The result should be interpreted as buffer * 10^(point-length). -// The buffer will be null-terminated. -// -// The input v must be > 0 and different from NaN, and Infinity. -// -// The output depends on the given mode: -// - SHORTEST: produce the least amount of digits for which the internal -// identity requirement is still satisfied. If the digits are printed -// (together with the correct exponent) then reading this number will give -// 'v' again. The buffer will choose the representation that is closest to -// 'v'. If there are two at the same distance, than the number is round up. -// In this mode the 'requested_digits' parameter is ignored. -// - FIXED: produces digits necessary to print a given number with -// 'requested_digits' digits after the decimal point. The produced digits -// might be too short in which case the caller has to fill the gaps with '0's. -// Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. -// Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns -// buffer="2", point=0. -// Note: the length of the returned buffer has no meaning wrt the significance -// of its digits. That is, just because it contains '0's does not mean that -// any other digit would not satisfy the internal identity requirement. -// - PRECISION: produces 'requested_digits' where the first digit is not '0'. -// Even though the length of produced digits usually equals -// 'requested_digits', the function is allowed to return fewer digits, in -// which case the caller has to fill the missing digits with '0's. -// Halfway cases are again rounded up. -// 'BignumDtoa' expects the given buffer to be big enough to hold all digits -// and a terminating null-character. -void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, Vector buffer, int * length, int * point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ diff --git a/base/poco/Foundation/src/bignum.cc b/base/poco/Foundation/src/bignum.cc deleted file mode 100644 index 2743d67e8d9..00000000000 --- a/base/poco/Foundation/src/bignum.cc +++ /dev/null @@ -1,766 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "bignum.h" -#include "utils.h" - -namespace double_conversion { - -Bignum::Bignum() - : bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) { - for (int i = 0; i < kBigitCapacity; ++i) { - bigits_[i] = 0; - } -} - - -template -static int BitSize(S value) { - (void) value; // Mark variable as used. - return 8 * sizeof(value); -} - -// Guaranteed to lie in one Bigit. -void Bignum::AssignUInt16(uint16_t value) { - ASSERT(kBigitSize >= BitSize(value)); - Zero(); - if (value == 0) return; - - EnsureCapacity(1); - bigits_[0] = value; - used_digits_ = 1; -} - - -void Bignum::AssignUInt64(uint64_t value) { - const int kUInt64Size = 64; - - Zero(); - if (value == 0) return; - - int needed_bigits = kUInt64Size / kBigitSize + 1; - EnsureCapacity(needed_bigits); - for (int i = 0; i < needed_bigits; ++i) { - bigits_[i] = value & kBigitMask; - value = value >> kBigitSize; - } - used_digits_ = needed_bigits; - Clamp(); -} - - -void Bignum::AssignBignum(const Bignum& other) { - exponent_ = other.exponent_; - for (int i = 0; i < other.used_digits_; ++i) { - bigits_[i] = other.bigits_[i]; - } - // Clear the excess digits (if there were any). - for (int i = other.used_digits_; i < used_digits_; ++i) { - bigits_[i] = 0; - } - used_digits_ = other.used_digits_; -} - - -static uint64_t ReadUInt64(Vector buffer, - int from, - int digits_to_read) { - uint64_t result = 0; - for (int i = from; i < from + digits_to_read; ++i) { - int digit = buffer[i] - '0'; - ASSERT(0 <= digit && digit <= 9); - result = result * 10 + digit; - } - return result; -} - - -void Bignum::AssignDecimalString(Vector value) { - // 2^64 = 18446744073709551616 > 10^19 - const int kMaxUint64DecimalDigits = 19; - Zero(); - int length = value.length(); - int pos = 0; - // Let's just say that each digit needs 4 bits. - while (length >= kMaxUint64DecimalDigits) { - uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); - pos += kMaxUint64DecimalDigits; - length -= kMaxUint64DecimalDigits; - MultiplyByPowerOfTen(kMaxUint64DecimalDigits); - AddUInt64(digits); - } - uint64_t digits = ReadUInt64(value, pos, length); - MultiplyByPowerOfTen(length); - AddUInt64(digits); - Clamp(); -} - - -static int HexCharValue(char c) { - if ('0' <= c && c <= '9') return c - '0'; - if ('a' <= c && c <= 'f') return 10 + c - 'a'; - ASSERT('A' <= c && c <= 'F'); - return 10 + c - 'A'; -} - - -void Bignum::AssignHexString(Vector value) { - Zero(); - int length = value.length(); - - int needed_bigits = length * 4 / kBigitSize + 1; - EnsureCapacity(needed_bigits); - int string_index = length - 1; - for (int i = 0; i < needed_bigits - 1; ++i) { - // These bigits are guaranteed to be "full". - Chunk current_bigit = 0; - for (int j = 0; j < kBigitSize / 4; j++) { - current_bigit += HexCharValue(value[string_index--]) << (j * 4); - } - bigits_[i] = current_bigit; - } - used_digits_ = needed_bigits - 1; - - Chunk most_significant_bigit = 0; // Could be = 0; - for (int j = 0; j <= string_index; ++j) { - most_significant_bigit <<= 4; - most_significant_bigit += HexCharValue(value[j]); - } - if (most_significant_bigit != 0) { - bigits_[used_digits_] = most_significant_bigit; - used_digits_++; - } - Clamp(); -} - - -void Bignum::AddUInt64(uint64_t operand) { - if (operand == 0) return; - Bignum other; - other.AssignUInt64(operand); - AddBignum(other); -} - - -void Bignum::AddBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - - // If this has a greater exponent than other append zero-bigits to this. - // After this call exponent_ <= other.exponent_. - Align(other); - - // There are two possibilities: - // aaaaaaaaaaa 0000 (where the 0s represent a's exponent) - // bbbbb 00000000 - // ---------------- - // ccccccccccc 0000 - // or - // aaaaaaaaaa 0000 - // bbbbbbbbb 0000000 - // ----------------- - // cccccccccccc 0000 - // In both cases we might need a carry bigit. - - EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); - Chunk carry = 0; - int bigit_pos = other.exponent_ - exponent_; - ASSERT(bigit_pos >= 0); - for (int i = 0; i < other.used_digits_; ++i) { - Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; - bigits_[bigit_pos] = sum & kBigitMask; - carry = sum >> kBigitSize; - bigit_pos++; - } - - while (carry != 0) { - Chunk sum = bigits_[bigit_pos] + carry; - bigits_[bigit_pos] = sum & kBigitMask; - carry = sum >> kBigitSize; - bigit_pos++; - } - used_digits_ = Max(bigit_pos, used_digits_); - ASSERT(IsClamped()); -} - - -void Bignum::SubtractBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - // We require this to be bigger than other. - ASSERT(LessEqual(other, *this)); - - Align(other); - - int offset = other.exponent_ - exponent_; - Chunk borrow = 0; - int i; - for (i = 0; i < other.used_digits_; ++i) { - ASSERT((borrow == 0) || (borrow == 1)); - Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; - bigits_[i + offset] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - } - while (borrow != 0) { - Chunk difference = bigits_[i + offset] - borrow; - bigits_[i + offset] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - ++i; - } - Clamp(); -} - - -void Bignum::ShiftLeft(int shift_amount) { - if (used_digits_ == 0) return; - exponent_ += shift_amount / kBigitSize; - int local_shift = shift_amount % kBigitSize; - EnsureCapacity(used_digits_ + 1); - BigitsShiftLeft(local_shift); -} - - -void Bignum::MultiplyByUInt32(uint32_t factor) { - if (factor == 1) return; - if (factor == 0) { - Zero(); - return; - } - if (used_digits_ == 0) return; - - // The product of a bigit with the factor is of size kBigitSize + 32. - // Assert that this number + 1 (for the carry) fits into double chunk. - ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); - DoubleChunk carry = 0; - for (int i = 0; i < used_digits_; ++i) { - DoubleChunk product = static_cast(factor) * bigits_[i] + carry; - bigits_[i] = static_cast(product & kBigitMask); - carry = (product >> kBigitSize); - } - while (carry != 0) { - EnsureCapacity(used_digits_ + 1); - bigits_[used_digits_] = carry & kBigitMask; - used_digits_++; - carry >>= kBigitSize; - } -} - - -void Bignum::MultiplyByUInt64(uint64_t factor) { - if (factor == 1) return; - if (factor == 0) { - Zero(); - return; - } - ASSERT(kBigitSize < 32); - uint64_t carry = 0; - uint64_t low = factor & 0xFFFFFFFF; - uint64_t high = factor >> 32; - for (int i = 0; i < used_digits_; ++i) { - uint64_t product_low = low * bigits_[i]; - uint64_t product_high = high * bigits_[i]; - uint64_t tmp = (carry & kBigitMask) + product_low; - bigits_[i] = tmp & kBigitMask; - carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + - (product_high << (32 - kBigitSize)); - } - while (carry != 0) { - EnsureCapacity(used_digits_ + 1); - bigits_[used_digits_] = carry & kBigitMask; - used_digits_++; - carry >>= kBigitSize; - } -} - - -void Bignum::MultiplyByPowerOfTen(int exponent) { - const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); - const uint16_t kFive1 = 5; - const uint16_t kFive2 = kFive1 * 5; - const uint16_t kFive3 = kFive2 * 5; - const uint16_t kFive4 = kFive3 * 5; - const uint16_t kFive5 = kFive4 * 5; - const uint16_t kFive6 = kFive5 * 5; - const uint32_t kFive7 = kFive6 * 5; - const uint32_t kFive8 = kFive7 * 5; - const uint32_t kFive9 = kFive8 * 5; - const uint32_t kFive10 = kFive9 * 5; - const uint32_t kFive11 = kFive10 * 5; - const uint32_t kFive12 = kFive11 * 5; - const uint32_t kFive13 = kFive12 * 5; - const uint32_t kFive1_to_12[] = - { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, - kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; - - ASSERT(exponent >= 0); - if (exponent == 0) return; - if (used_digits_ == 0) return; - - // We shift by exponent at the end just before returning. - int remaining_exponent = exponent; - while (remaining_exponent >= 27) { - MultiplyByUInt64(kFive27); - remaining_exponent -= 27; - } - while (remaining_exponent >= 13) { - MultiplyByUInt32(kFive13); - remaining_exponent -= 13; - } - if (remaining_exponent > 0) { - MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); - } - ShiftLeft(exponent); -} - - -void Bignum::Square() { - ASSERT(IsClamped()); - int product_length = 2 * used_digits_; - EnsureCapacity(product_length); - - // Comba multiplication: compute each column separately. - // Example: r = a2a1a0 * b2b1b0. - // r = 1 * a0b0 + - // 10 * (a1b0 + a0b1) + - // 100 * (a2b0 + a1b1 + a0b2) + - // 1000 * (a2b1 + a1b2) + - // 10000 * a2b2 - // - // In the worst case we have to accumulate nb-digits products of digit*digit. - // - // Assert that the additional number of bits in a DoubleChunk are enough to - // sum up used_digits of Bigit*Bigit. - if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { - UNIMPLEMENTED(); - } - DoubleChunk accumulator = 0; - // First shift the digits so we don't overwrite them. - int copy_offset = used_digits_; - for (int i = 0; i < used_digits_; ++i) { - bigits_[copy_offset + i] = bigits_[i]; - } - // We have two loops to avoid some 'if's in the loop. - for (int i = 0; i < used_digits_; ++i) { - // Process temporary digit i with power i. - // The sum of the two indices must be equal to i. - int bigit_index1 = i; - int bigit_index2 = 0; - // Sum all of the sub-products. - while (bigit_index1 >= 0) { - Chunk chunk1 = bigits_[copy_offset + bigit_index1]; - Chunk chunk2 = bigits_[copy_offset + bigit_index2]; - accumulator += static_cast(chunk1) * chunk2; - bigit_index1--; - bigit_index2++; - } - bigits_[i] = static_cast(accumulator) & kBigitMask; - accumulator >>= kBigitSize; - } - for (int i = used_digits_; i < product_length; ++i) { - int bigit_index1 = used_digits_ - 1; - int bigit_index2 = i - bigit_index1; - // Invariant: sum of both indices is again equal to i. - // Inner loop runs 0 times on last iteration, emptying accumulator. - while (bigit_index2 < used_digits_) { - Chunk chunk1 = bigits_[copy_offset + bigit_index1]; - Chunk chunk2 = bigits_[copy_offset + bigit_index2]; - accumulator += static_cast(chunk1) * chunk2; - bigit_index1--; - bigit_index2++; - } - // The overwritten bigits_[i] will never be read in further loop iterations, - // because bigit_index1 and bigit_index2 are always greater - // than i - used_digits_. - bigits_[i] = static_cast(accumulator) & kBigitMask; - accumulator >>= kBigitSize; - } - // Since the result was guaranteed to lie inside the number the - // accumulator must be 0 now. - ASSERT(accumulator == 0); - - // Don't forget to update the used_digits and the exponent. - used_digits_ = product_length; - exponent_ *= 2; - Clamp(); -} - - -void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { - ASSERT(base != 0); - ASSERT(power_exponent >= 0); - if (power_exponent == 0) { - AssignUInt16(1); - return; - } - Zero(); - int shifts = 0; - // We expect base to be in range 2-32, and most often to be 10. - // It does not make much sense to implement different algorithms for counting - // the bits. - while ((base & 1) == 0) { - base >>= 1; - shifts++; - } - int bit_size = 0; - int tmp_base = base; - while (tmp_base != 0) { - tmp_base >>= 1; - bit_size++; - } - int final_size = bit_size * power_exponent; - // 1 extra bigit for the shifting, and one for rounded final_size. - EnsureCapacity(final_size / kBigitSize + 2); - - // Left to Right exponentiation. - int mask = 1; - while (power_exponent >= mask) mask <<= 1; - - // The mask is now pointing to the bit above the most significant 1-bit of - // power_exponent. - // Get rid of first 1-bit; - mask >>= 2; - uint64_t this_value = base; - - bool delayed_multipliciation = false; - const uint64_t max_32bits = 0xFFFFFFFF; - while (mask != 0 && this_value <= max_32bits) { - this_value = this_value * this_value; - // Verify that there is enough space in this_value to perform the - // multiplication. The first bit_size bits must be 0. - if ((power_exponent & mask) != 0) { - uint64_t base_bits_mask = - ~((static_cast(1) << (64 - bit_size)) - 1); - bool high_bits_zero = (this_value & base_bits_mask) == 0; - if (high_bits_zero) { - this_value *= base; - } else { - delayed_multipliciation = true; - } - } - mask >>= 1; - } - AssignUInt64(this_value); - if (delayed_multipliciation) { - MultiplyByUInt32(base); - } - - // Now do the same thing as a bignum. - while (mask != 0) { - Square(); - if ((power_exponent & mask) != 0) { - MultiplyByUInt32(base); - } - mask >>= 1; - } - - // And finally add the saved shifts. - ShiftLeft(shifts * power_exponent); -} - - -// Precondition: this/other < 16bit. -uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - ASSERT(other.used_digits_ > 0); - - // Easy case: if we have less digits than the divisor than the result is 0. - // Note: this handles the case where this == 0, too. - if (BigitLength() < other.BigitLength()) { - return 0; - } - - Align(other); - - uint16_t result = 0; - - // Start by removing multiples of 'other' until both numbers have the same - // number of digits. - while (BigitLength() > other.BigitLength()) { - // This naive approach is extremely inefficient if `this` divided by other - // is big. This function is implemented for doubleToString where - // the result should be small (less than 10). - ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); - ASSERT(bigits_[used_digits_ - 1] < 0x10000); - // Remove the multiples of the first digit. - // Example this = 23 and other equals 9. -> Remove 2 multiples. - result += static_cast(bigits_[used_digits_ - 1]); - SubtractTimes(other, bigits_[used_digits_ - 1]); - } - - ASSERT(BigitLength() == other.BigitLength()); - - // Both bignums are at the same length now. - // Since other has more than 0 digits we know that the access to - // bigits_[used_digits_ - 1] is safe. - Chunk this_bigit = bigits_[used_digits_ - 1]; - Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; - - if (other.used_digits_ == 1) { - // Shortcut for easy (and common) case. - int quotient = this_bigit / other_bigit; - bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; - ASSERT(quotient < 0x10000); - result += static_cast(quotient); - Clamp(); - return result; - } - - int division_estimate = this_bigit / (other_bigit + 1); - ASSERT(division_estimate < 0x10000); - result += static_cast(division_estimate); - SubtractTimes(other, division_estimate); - - if (other_bigit * (division_estimate + 1) > this_bigit) { - // No need to even try to subtract. Even if other's remaining digits were 0 - // another subtraction would be too much. - return result; - } - - while (LessEqual(other, *this)) { - SubtractBignum(other); - result++; - } - return result; -} - - -template -static int SizeInHexChars(S number) { - ASSERT(number > 0); - int result = 0; - while (number != 0) { - number >>= 4; - result++; - } - return result; -} - - -static char HexCharOfValue(int value) { - ASSERT(0 <= value && value <= 16); - if (value < 10) return static_cast(value + '0'); - return static_cast(value - 10 + 'A'); -} - - -bool Bignum::ToHexString(char* buffer, int buffer_size) const { - ASSERT(IsClamped()); - // Each bigit must be printable as separate hex-character. - ASSERT(kBigitSize % 4 == 0); - const int kHexCharsPerBigit = kBigitSize / 4; - - if (used_digits_ == 0) { - if (buffer_size < 2) return false; - buffer[0] = '0'; - buffer[1] = '\0'; - return true; - } - // We add 1 for the terminating '\0' character. - int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + - SizeInHexChars(bigits_[used_digits_ - 1]) + 1; - if (needed_chars > buffer_size) return false; - int string_index = needed_chars - 1; - buffer[string_index--] = '\0'; - for (int i = 0; i < exponent_; ++i) { - for (int j = 0; j < kHexCharsPerBigit; ++j) { - buffer[string_index--] = '0'; - } - } - for (int i = 0; i < used_digits_ - 1; ++i) { - Chunk current_bigit = bigits_[i]; - for (int j = 0; j < kHexCharsPerBigit; ++j) { - buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); - current_bigit >>= 4; - } - } - // And finally the last bigit. - Chunk most_significant_bigit = bigits_[used_digits_ - 1]; - while (most_significant_bigit != 0) { - buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); - most_significant_bigit >>= 4; - } - return true; -} - - -Bignum::Chunk Bignum::BigitAt(int index) const { - if (index >= BigitLength()) return 0; - if (index < exponent_) return 0; - return bigits_[index - exponent_]; -} - - -int Bignum::Compare(const Bignum& a, const Bignum& b) { - ASSERT(a.IsClamped()); - ASSERT(b.IsClamped()); - int bigit_length_a = a.BigitLength(); - int bigit_length_b = b.BigitLength(); - if (bigit_length_a < bigit_length_b) return -1; - if (bigit_length_a > bigit_length_b) return +1; - for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { - Chunk bigit_a = a.BigitAt(i); - Chunk bigit_b = b.BigitAt(i); - if (bigit_a < bigit_b) return -1; - if (bigit_a > bigit_b) return +1; - // Otherwise they are equal up to this digit. Try the next digit. - } - return 0; -} - - -int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { - ASSERT(a.IsClamped()); - ASSERT(b.IsClamped()); - ASSERT(c.IsClamped()); - if (a.BigitLength() < b.BigitLength()) { - return PlusCompare(b, a, c); - } - if (a.BigitLength() + 1 < c.BigitLength()) return -1; - if (a.BigitLength() > c.BigitLength()) return +1; - // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than - // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one - // of 'a'. - if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { - return -1; - } - - Chunk borrow = 0; - // Starting at min_exponent all digits are == 0. So no need to compare them. - int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); - for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { - Chunk chunk_a = a.BigitAt(i); - Chunk chunk_b = b.BigitAt(i); - Chunk chunk_c = c.BigitAt(i); - Chunk sum = chunk_a + chunk_b; - if (sum > chunk_c + borrow) { - return +1; - } else { - borrow = chunk_c + borrow - sum; - if (borrow > 1) return -1; - borrow <<= kBigitSize; - } - } - if (borrow == 0) return 0; - return -1; -} - - -void Bignum::Clamp() { - while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { - used_digits_--; - } - if (used_digits_ == 0) { - // Zero. - exponent_ = 0; - } -} - - -bool Bignum::IsClamped() const { - return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; -} - - -void Bignum::Zero() { - for (int i = 0; i < used_digits_; ++i) { - bigits_[i] = 0; - } - used_digits_ = 0; - exponent_ = 0; -} - - -void Bignum::Align(const Bignum& other) { - if (exponent_ > other.exponent_) { - // If "X" represents a "hidden" digit (by the exponent) then we are in the - // following case (a == this, b == other): - // a: aaaaaaXXXX or a: aaaaaXXX - // b: bbbbbbX b: bbbbbbbbXX - // We replace some of the hidden digits (X) of a with 0 digits. - // a: aaaaaa000X or a: aaaaa0XX - int zero_digits = exponent_ - other.exponent_; - EnsureCapacity(used_digits_ + zero_digits); - for (int i = used_digits_ - 1; i >= 0; --i) { - bigits_[i + zero_digits] = bigits_[i]; - } - for (int i = 0; i < zero_digits; ++i) { - bigits_[i] = 0; - } - used_digits_ += zero_digits; - exponent_ -= zero_digits; - ASSERT(used_digits_ >= 0); - ASSERT(exponent_ >= 0); - } -} - - -void Bignum::BigitsShiftLeft(int shift_amount) { - ASSERT(shift_amount < kBigitSize); - ASSERT(shift_amount >= 0); - Chunk carry = 0; - for (int i = 0; i < used_digits_; ++i) { - Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); - bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; - carry = new_carry; - } - if (carry != 0) { - bigits_[used_digits_] = carry; - used_digits_++; - } -} - - -void Bignum::SubtractTimes(const Bignum& other, int factor) { - ASSERT(exponent_ <= other.exponent_); - if (factor < 3) { - for (int i = 0; i < factor; ++i) { - SubtractBignum(other); - } - return; - } - Chunk borrow = 0; - int exponent_diff = other.exponent_ - exponent_; - for (int i = 0; i < other.used_digits_; ++i) { - DoubleChunk product = static_cast(factor) * other.bigits_[i]; - DoubleChunk remove = borrow + product; - Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); - bigits_[i + exponent_diff] = difference & kBigitMask; - borrow = static_cast((difference >> (kChunkSize - 1)) + - (remove >> kBigitSize)); - } - for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { - if (borrow == 0) return; - Chunk difference = bigits_[i] - borrow; - bigits_[i] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - } - Clamp(); -} - - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/bignum.h b/base/poco/Foundation/src/bignum.h deleted file mode 100644 index ec56adac23f..00000000000 --- a/base/poco/Foundation/src/bignum.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_BIGNUM_H_ -#define DOUBLE_CONVERSION_BIGNUM_H_ - -#include "utils.h" - -namespace double_conversion -{ - -class Bignum -{ -public: - // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. - // This bignum can encode much bigger numbers, since it contains an - // exponent. - static const int kMaxSignificantBits = 3584; - - Bignum(); - void AssignUInt16(uint16_t value); - void AssignUInt64(uint64_t value); - void AssignBignum(const Bignum & other); - - void AssignDecimalString(Vector value); - void AssignHexString(Vector value); - - void AssignPowerUInt16(uint16_t base, int exponent); - - void AddUInt16(uint16_t operand); - void AddUInt64(uint64_t operand); - void AddBignum(const Bignum & other); - // Precondition: this >= other. - void SubtractBignum(const Bignum & other); - - void Square(); - void ShiftLeft(int shift_amount); - void MultiplyByUInt32(uint32_t factor); - void MultiplyByUInt64(uint64_t factor); - void MultiplyByPowerOfTen(int exponent); - void Times10() { return MultiplyByUInt32(10); } - // Pseudocode: - // int result = this / other; - // this = this % other; - // In the worst case this function is in O(this/other). - uint16_t DivideModuloIntBignum(const Bignum & other); - - bool ToHexString(char * buffer, int buffer_size) const; - - // Returns - // -1 if a < b, - // 0 if a == b, and - // +1 if a > b. - static int Compare(const Bignum & a, const Bignum & b); - static bool Equal(const Bignum & a, const Bignum & b) { return Compare(a, b) == 0; } - static bool LessEqual(const Bignum & a, const Bignum & b) { return Compare(a, b) <= 0; } - static bool Less(const Bignum & a, const Bignum & b) { return Compare(a, b) < 0; } - // Returns Compare(a + b, c); - static int PlusCompare(const Bignum & a, const Bignum & b, const Bignum & c); - // Returns a + b == c - static bool PlusEqual(const Bignum & a, const Bignum & b, const Bignum & c) { return PlusCompare(a, b, c) == 0; } - // Returns a + b <= c - static bool PlusLessEqual(const Bignum & a, const Bignum & b, const Bignum & c) { return PlusCompare(a, b, c) <= 0; } - // Returns a + b < c - static bool PlusLess(const Bignum & a, const Bignum & b, const Bignum & c) { return PlusCompare(a, b, c) < 0; } - -private: - typedef uint32_t Chunk; - typedef uint64_t DoubleChunk; - - static const int kChunkSize = sizeof(Chunk) * 8; - static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; - // With bigit size of 28 we loose some bits, but a double still fits easily - // into two chunks, and more importantly we can use the Comba multiplication. - static const int kBigitSize = 28; - static const Chunk kBigitMask = (1 << kBigitSize) - 1; - // Every instance allocates kBigitLength chunks on the stack. Bignums cannot - // grow. There are no checks if the stack-allocated space is sufficient. - static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; - - void EnsureCapacity(int size) - { - if (size > kBigitCapacity) - { - UNREACHABLE(); - } - } - void Align(const Bignum & other); - void Clamp(); - bool IsClamped() const; - void Zero(); - // Requires this to have enough capacity (no tests done). - // Updates used_digits_ if necessary. - // shift_amount must be < kBigitSize. - void BigitsShiftLeft(int shift_amount); - // BigitLength includes the "hidden" digits encoded in the exponent. - int BigitLength() const { return used_digits_ + exponent_; } - Chunk BigitAt(int index) const; - void SubtractTimes(const Bignum & other, int factor); - - Chunk bigits_buffer_[kBigitCapacity]; - // A vector backed by bigits_buffer_. This way accesses to the array are - // checked for out-of-bounds errors. - Vector bigits_; - int used_digits_; - // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). - int exponent_; - - DISALLOW_COPY_AND_ASSIGN(Bignum); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_BIGNUM_H_ diff --git a/base/poco/Foundation/src/cached-powers.cc b/base/poco/Foundation/src/cached-powers.cc deleted file mode 100644 index d1359ffe43e..00000000000 --- a/base/poco/Foundation/src/cached-powers.cc +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2006-2008 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include - -#include "utils.h" - -#include "cached-powers.h" - -namespace double_conversion { - -struct CachedPower { - uint64_t significand; - int16_t binary_exponent; - int16_t decimal_exponent; -}; - -static const CachedPower kCachedPowers[] = { - {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, - {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, - {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, - {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, - {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, - {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, - {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, - {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, - {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, - {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, - {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, - {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, - {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, - {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, - {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, - {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, - {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, - {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, - {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, - {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, - {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, - {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, - {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, - {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, - {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, - {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, - {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, - {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, - {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, - {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, - {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, - {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, - {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, - {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, - {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, - {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, - {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, - {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, - {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, - {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, - {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, - {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, - {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, - {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, - {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, - {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, - {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, - {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, - {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, - {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, - {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, - {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, - {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, - {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, - {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, - {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, - {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, - {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, - {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, - {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, - {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, - {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, - {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, - {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, - {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, - {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, - {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, - {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, - {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, - {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, - {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, - {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, - {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, - {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, - {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, - {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, - {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, - {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, - {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, - {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, - {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, - {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, - {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, - {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, - {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, - {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, - {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, -}; - -static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers); -static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. -static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) -// Difference between the decimal exponents in the table above. -const int PowersOfTenCache::kDecimalExponentDistance = 8; -const int PowersOfTenCache::kMinDecimalExponent = -348; -const int PowersOfTenCache::kMaxDecimalExponent = 340; - -void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - int min_exponent, - int max_exponent, - DiyFp* power, - int* decimal_exponent) { - int kQ = DiyFp::kSignificandSize; - double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); - int foo = kCachedPowersOffset; - int index = - (foo + static_cast(k) - 1) / kDecimalExponentDistance + 1; - ASSERT(0 <= index && index < kCachedPowersLength); - CachedPower cached_power = kCachedPowers[index]; - ASSERT(min_exponent <= cached_power.binary_exponent); - (void) max_exponent; // Mark variable as used. - ASSERT(cached_power.binary_exponent <= max_exponent); - *decimal_exponent = cached_power.decimal_exponent; - *power = DiyFp(cached_power.significand, cached_power.binary_exponent); -} - - -void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, - DiyFp* power, - int* found_exponent) { - ASSERT(kMinDecimalExponent <= requested_exponent); - ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); - int index = - (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; - CachedPower cached_power = kCachedPowers[index]; - *power = DiyFp(cached_power.significand, cached_power.binary_exponent); - *found_exponent = cached_power.decimal_exponent; - ASSERT(*found_exponent <= requested_exponent); - ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/cached-powers.h b/base/poco/Foundation/src/cached-powers.h deleted file mode 100644 index a65092d6cad..00000000000 --- a/base/poco/Foundation/src/cached-powers.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ -#define DOUBLE_CONVERSION_CACHED_POWERS_H_ - -#include "diy-fp.h" - -namespace double_conversion -{ - -class PowersOfTenCache -{ -public: - // Not all powers of ten are cached. The decimal exponent of two neighboring - // cached numbers will differ by kDecimalExponentDistance. - static const int kDecimalExponentDistance; - - static const int kMinDecimalExponent; - static const int kMaxDecimalExponent; - - // Returns a cached power-of-ten with a binary exponent in the range - // [min_exponent; max_exponent] (boundaries included). - static void GetCachedPowerForBinaryExponentRange(int min_exponent, int max_exponent, DiyFp * power, int * decimal_exponent); - - // Returns a cached power of ten x ~= 10^k such that - // k <= decimal_exponent < k + kCachedPowersDecimalDistance. - // The given decimal_exponent must satisfy - // kMinDecimalExponent <= requested_exponent, and - // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. - static void GetCachedPowerForDecimalExponent(int requested_exponent, DiyFp * power, int * found_exponent); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_ diff --git a/base/poco/Foundation/src/compress.c b/base/poco/Foundation/src/compress.c deleted file mode 100644 index e2db404abf8..00000000000 --- a/base/poco/Foundation/src/compress.c +++ /dev/null @@ -1,86 +0,0 @@ -/* compress.c -- compress a memory buffer - * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id$ */ - -#define ZLIB_INTERNAL -#include "zlib.h" - -/* =========================================================================== - Compresses the source buffer into the destination buffer. The level - parameter has the same meaning as in deflateInit. sourceLen is the byte - length of the source buffer. Upon entry, destLen is the total size of the - destination buffer, which must be at least 0.1% larger than sourceLen plus - 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. - - compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_BUF_ERROR if there was not enough room in the output buffer, - Z_STREAM_ERROR if the level parameter is invalid. -*/ -int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; - int level; -{ - z_stream stream; - int err; - const uInt max = (uInt)-1; - uLong left; - - left = *destLen; - *destLen = 0; - - stream.zalloc = (alloc_func)0; - stream.zfree = (free_func)0; - stream.opaque = (voidpf)0; - - err = deflateInit(&stream, level); - if (err != Z_OK) return err; - - stream.next_out = dest; - stream.avail_out = 0; - stream.next_in = (z_const Bytef *)source; - stream.avail_in = 0; - - do { - if (stream.avail_out == 0) { - stream.avail_out = left > (uLong)max ? max : (uInt)left; - left -= stream.avail_out; - } - if (stream.avail_in == 0) { - stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; - sourceLen -= stream.avail_in; - } - err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); - } while (err == Z_OK); - - *destLen = stream.total_out; - deflateEnd(&stream); - return err == Z_STREAM_END ? Z_OK : err; -} - -/* =========================================================================== - */ -int ZEXPORT compress (dest, destLen, source, sourceLen) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; -{ - return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); -} - -/* =========================================================================== - If the default memLevel or windowBits for deflateInit() is changed, then - this function needs to be updated. - */ -uLong ZEXPORT compressBound (sourceLen) - uLong sourceLen; -{ - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + - (sourceLen >> 25) + 13; -} diff --git a/base/poco/Foundation/src/crc32.c b/base/poco/Foundation/src/crc32.c deleted file mode 100644 index a0fe210ab1b..00000000000 --- a/base/poco/Foundation/src/crc32.c +++ /dev/null @@ -1,444 +0,0 @@ -/* crc32.c -- compute the CRC-32 of a data stream - * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - * Thanks to Rodney Brown for his contribution of faster - * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing - * tables for updating the shift register in one step with three exclusive-ors - * instead of four steps with four exclusive-ors. This results in about a - * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. - */ - -/* @(#) $Id$ */ - -/* - Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore - protection on the static variables used to control the first-use generation - of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should - first call get_crc_table() to initialize the tables before allowing more than - one thread to use crc32(). - - DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. - */ - -#ifdef MAKECRCH -# include -# ifndef DYNAMIC_CRC_TABLE -# define DYNAMIC_CRC_TABLE -# endif /* !DYNAMIC_CRC_TABLE */ -#endif /* MAKECRCH */ - -#include "zutil.h" /* for STDC and FAR definitions */ - -#define local static - -/* Definitions for doing the crc four data bytes at a time. */ -#if !defined(NOBYFOUR) && defined(Z_U4) -# define BYFOUR -#endif -#ifdef BYFOUR - local unsigned long crc32_little OF((unsigned long, - const unsigned char FAR *, z_size_t)); - local unsigned long crc32_big OF((unsigned long, - const unsigned char FAR *, z_size_t)); -# define TBLS 8 -#else -# define TBLS 1 -#endif /* BYFOUR */ - -/* Local functions for crc concatenation */ -local unsigned long gf2_matrix_times OF((unsigned long *mat, - unsigned long vec)); -local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); -local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); - - -#ifdef DYNAMIC_CRC_TABLE - -local volatile int crc_table_empty = 1; -local z_crc_t FAR crc_table[TBLS][256]; -local void make_crc_table OF((void)); -#ifdef MAKECRCH - local void write_table OF((FILE *, const z_crc_t FAR *)); -#endif /* MAKECRCH */ -/* - Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: - x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. - - Polynomials over GF(2) are represented in binary, one bit per coefficient, - with the lowest powers in the most significant bit. Then adding polynomials - is just exclusive-or, and multiplying a polynomial by x is a right shift by - one. If we call the above polynomial p, and represent a byte as the - polynomial q, also with the lowest power in the most significant bit (so the - byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, - where a mod b means the remainder after dividing a by b. - - This calculation is done using the shift-register method of multiplying and - taking the remainder. The register is initialized to zero, and for each - incoming bit, x^32 is added mod p to the register if the bit is a one (where - x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by - x (which is shifting right by one and adding x^32 mod p if the bit shifted - out is a one). We start with the highest power (least significant bit) of - q and repeat for all eight bits of q. - - The first table is simply the CRC of all possible eight bit values. This is - all the information needed to generate CRCs on data a byte at a time for all - combinations of CRC register values and incoming bytes. The remaining tables - allow for word-at-a-time CRC calculation for both big-endian and little- - endian machines, where a word is four bytes. -*/ -local void make_crc_table() -{ - z_crc_t c; - int n, k; - z_crc_t poly; /* polynomial exclusive-or pattern */ - /* terms of polynomial defining this crc (except x^32): */ - static volatile int first = 1; /* flag to limit concurrent making */ - static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; - - /* See if another task is already doing this (not thread-safe, but better - than nothing -- significantly reduces duration of vulnerability in - case the advice about DYNAMIC_CRC_TABLE is ignored) */ - if (first) { - first = 0; - - /* make exclusive-or pattern from polynomial (0xedb88320UL) */ - poly = 0; - for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) - poly |= (z_crc_t)1 << (31 - p[n]); - - /* generate a crc for every 8-bit value */ - for (n = 0; n < 256; n++) { - c = (z_crc_t)n; - for (k = 0; k < 8; k++) - c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[0][n] = c; - } - -#ifdef BYFOUR - /* generate crc for each value followed by one, two, and three zeros, - and then the byte reversal of those as well as the first table */ - for (n = 0; n < 256; n++) { - c = crc_table[0][n]; - crc_table[4][n] = ZSWAP32(c); - for (k = 1; k < 4; k++) { - c = crc_table[0][c & 0xff] ^ (c >> 8); - crc_table[k][n] = c; - crc_table[k + 4][n] = ZSWAP32(c); - } - } -#endif /* BYFOUR */ - - crc_table_empty = 0; - } - else { /* not first */ - /* wait for the other guy to finish (not efficient, but rare) */ - while (crc_table_empty) - ; - } - -#ifdef MAKECRCH - /* write out CRC tables to crc32.h */ - { - FILE *out; - - out = fopen("crc32.h", "w"); - if (out == NULL) return; - fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); - fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); - fprintf(out, "local const z_crc_t FAR "); - fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); - write_table(out, crc_table[0]); -# ifdef BYFOUR - fprintf(out, "#ifdef BYFOUR\n"); - for (k = 1; k < 8; k++) { - fprintf(out, " },\n {\n"); - write_table(out, crc_table[k]); - } - fprintf(out, "#endif\n"); -# endif /* BYFOUR */ - fprintf(out, " }\n};\n"); - fclose(out); - } -#endif /* MAKECRCH */ -} - -#ifdef MAKECRCH -local void write_table(out, table) - FILE *out; - const z_crc_t FAR *table; -{ - int n; - - for (n = 0; n < 256; n++) - fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", - (unsigned long)(table[n]), - n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); -} -#endif /* MAKECRCH */ - -#else /* !DYNAMIC_CRC_TABLE */ -/* ======================================================================== - * Tables of CRC-32s of all single-byte values, made by make_crc_table(). - */ -#include "crc32.h" -#endif /* DYNAMIC_CRC_TABLE */ - -/* ========================================================================= - * This function can be used by asm versions of crc32() - */ -const z_crc_t FAR * ZEXPORT get_crc_table() -{ -#ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); -#endif /* DYNAMIC_CRC_TABLE */ - return (const z_crc_t FAR *)crc_table; -} - -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 - -/* ========================================================================= */ -unsigned long ZEXPORT crc32_z(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - if (buf == Z_NULL) return 0UL; - -#ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); -#endif /* DYNAMIC_CRC_TABLE */ - -#ifdef BYFOUR - if (sizeof(void *) == sizeof(ptrdiff_t)) { - z_crc_t endian; - - endian = 1; - if (*((unsigned char *)(&endian))) - return crc32_little(crc, buf, len); - else - return crc32_big(crc, buf, len); - } -#endif /* BYFOUR */ - crc = crc ^ 0xffffffffUL; - while (len >= 8) { - DO8; - len -= 8; - } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffUL; -} - -/* ========================================================================= */ -unsigned long ZEXPORT crc32(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - uInt len; -{ - return crc32_z(crc, buf, len); -} - -#ifdef BYFOUR - -/* - This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit - integer pointer type. This violates the strict aliasing rule, where a - compiler can assume, for optimization purposes, that two pointers to - fundamentally different types won't ever point to the same memory. This can - manifest as a problem only if one of the pointers is written to. This code - only reads from those pointers. So long as this code remains isolated in - this compilation unit, there won't be a problem. For this reason, this code - should not be copied and pasted into a compilation unit in which other code - writes to the buffer that is passed to these routines. - */ - -/* ========================================================================= */ -#define DOLIT4 c ^= *buf4++; \ - c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ - crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] -#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 - -/* ========================================================================= */ -local unsigned long crc32_little(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = (z_crc_t)crc; - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; - } - while (len >= 4) { - DOLIT4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - } while (--len); - c = ~c; - return (unsigned long)c; -} - -/* ========================================================================= */ -#define DOBIG4 c ^= *buf4++; \ - c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ - crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] -#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 - -/* ========================================================================= */ -local unsigned long crc32_big(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = ZSWAP32((z_crc_t)crc); - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOBIG32; - len -= 32; - } - while (len >= 4) { - DOBIG4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - } while (--len); - c = ~c; - return (unsigned long)(ZSWAP32(c)); -} - -#endif /* BYFOUR */ - -#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ - -/* ========================================================================= */ -local unsigned long gf2_matrix_times(mat, vec) - unsigned long *mat; - unsigned long vec; -{ - unsigned long sum; - - sum = 0; - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - return sum; -} - -/* ========================================================================= */ -local void gf2_matrix_square(square, mat) - unsigned long *square; - unsigned long *mat; -{ - int n; - - for (n = 0; n < GF2_DIM; n++) - square[n] = gf2_matrix_times(mat, mat[n]); -} - -/* ========================================================================= */ -local uLong crc32_combine_(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off64_t len2; -{ - int n; - unsigned long row; - unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ - unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ - - /* degenerate case (also disallow negative lengths) */ - if (len2 <= 0) - return crc1; - - /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ - row = 1; - for (n = 1; n < GF2_DIM; n++) { - odd[n] = row; - row <<= 1; - } - - /* put operator for two zero bits in even */ - gf2_matrix_square(even, odd); - - /* put operator for four zero bits in odd */ - gf2_matrix_square(odd, even); - - /* apply len2 zeros to crc1 (first square will put the operator for one - zero byte, eight zero bits, in even) */ - do { - /* apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - if (len2 == 0) - break; - - /* another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - } while (len2 != 0); - - /* return combined crc */ - crc1 ^= crc2; - return crc1; -} - -/* ========================================================================= */ -uLong ZEXPORT crc32_combine(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off_t len2; -{ - return crc32_combine_(crc1, crc2, len2); -} - -uLong ZEXPORT crc32_combine64(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off64_t len2; -{ - return crc32_combine_(crc1, crc2, len2); -} diff --git a/base/poco/Foundation/src/crc32.h b/base/poco/Foundation/src/crc32.h deleted file mode 100644 index 3e00b277e77..00000000000 --- a/base/poco/Foundation/src/crc32.h +++ /dev/null @@ -1,241 +0,0 @@ -/* crc32.h -- tables for rapid CRC calculation - * Generated automatically by crc32.c - */ - -local const z_crc_t FAR crc_table[TBLS][256] - = {{0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, - 0x79dcb8a4UL, 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, - 0xf3b97148UL, 0x84be41deUL, 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, 0x646ba8c0UL, 0xfd62f97aUL, - 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, 0xa2677172UL, - 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, - 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, - 0xcfba9599UL, 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, - 0xb6662d3dUL, 0x76dc4190UL, 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, 0x9fbfe4a5UL, 0xe8b8d433UL, - 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, 0x6b6b51f4UL, - 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, - 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, - 0xd4bb30e2UL, 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, - 0x44042d73UL, 0x33031de5UL, 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, 0xc90c2086UL, 0x5768b525UL, - 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, 0x2eb40d81UL, - 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, - 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, - 0xf00f9344UL, 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, - 0x89d32be0UL, 0x10da7a5aUL, 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, 0xd6d6a3e8UL, 0xa1d1937eUL, - 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, 0x36034af6UL, - 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, - 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, - 0xb5d0cf31UL, 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, - 0x72076785UL, 0x05005713UL, 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, 0xe5d5be0dUL, 0x7cdcefb7UL, - 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, 0x18b74777UL, - 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, - 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, - 0x40df0b66UL, 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, - 0x24b4a3a6UL, 0xbad03605UL, 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, 0x5d681b02UL, 0x2a6f2b94UL, - 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, 0x2d02ef8dUL -#ifdef BYFOUR - }, - {0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, - 0xd1c2bb49UL, 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, - 0x78f470d3UL, 0x61ef4192UL, 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, 0x9b00a918UL, 0xb02dfadbUL, - 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, 0xbea97761UL, - 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, - 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, - 0x891c9175UL, 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, - 0x58de2a3cUL, 0xf0794f05UL, 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, 0xa623e883UL, 0xbf38d9c2UL, - 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, 0xbabb5d54UL, - 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, - 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, - 0x4ed03864UL, 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, - 0xc94824abUL, 0xd05315eaUL, 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, 0x04122a35UL, 0x4b53bcf2UL, - 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, 0x9a9107bbUL, - 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, - 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, - 0x71418a1aUL, 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, - 0xa0833153UL, 0x8bae6290UL, 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, 0xae07bce9UL, 0xb71c8da8UL, - 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, 0x54e85463UL, - 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, - 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, - 0x516bd0f5UL, 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, - 0x9da070c8UL, 0x84bb4189UL, 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, 0x7e54a903UL, 0x5579fac0UL, - 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, 0xce7953d8UL, - 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, - 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, - 0xa4911b66UL, 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, - 0x3f91b27eUL, 0x70d024b9UL, 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, 0xee530937UL, 0xf7483876UL, - 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, 0x9324fd72UL}, - {0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, - 0x0fd13b8fUL, 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, - 0x1fa2771eUL, 0x1e601d29UL, 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, 0x13f798ffUL, 0x11b126a6UL, - 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, 0x3a0bf8b9UL, - 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, - 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, - 0x20e69922UL, 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, - 0x2f37a2adUL, 0x709a8dc0UL, 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, 0x7417f172UL, 0x75d59b45UL, - 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, 0x6cbc2eb0UL, - 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, - 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, - 0x4a917579UL, 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, - 0x41cd3244UL, 0x400f5873UL, 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, 0x56b7d609UL, 0x53f8c08cUL, - 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, 0x5c29fb03UL, - 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, - 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, - 0xfd13b8f0UL, 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, - 0xf2c2837fUL, 0xf0843d26UL, 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, 0xd9785d60UL, 0xd8ba3757UL, - 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, 0xd4efd8b6UL, - 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, - 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, - 0xcd866d43UL, 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, - 0x9522eaf2UL, 0x94e080c5UL, 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, 0x99770513UL, 0x9b31bb4aUL, - 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, 0x88c623b5UL, - 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, - 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, - 0xa4755576UL, 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, - 0xb782cd89UL, 0xb2cddb0cUL, 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, 0xb853f606UL, 0xb9919c31UL, - 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, 0xbe9834edUL}, - {0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, - 0x7d084f8aUL, 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, - 0xfa109f14UL, 0x42acf871UL, 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, 0x2d111815UL, 0x3fa4b7fbUL, - 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, 0xb28700d0UL, - 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, - 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, - 0xd540a77dUL, 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, - 0xa848e8f7UL, 0x9b14583dUL, 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, 0xbe7f07e1UL, 0x06c36084UL, - 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, 0xcb0d0fa2UL, - 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, - 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, - 0x299358edUL, 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, 0x462eb889UL, 0x549b1767UL, 0xec277002UL, - 0x71f048bbUL, 0xc94c2fdeUL, 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, 0x798a0f72UL, 0xe45d37cbUL, - 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, 0x99557841UL, - 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, - 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, - 0xbd40e1a4UL, 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, - 0xc048ae2eUL, 0xd2fd01c0UL, 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, 0x4d6b1905UL, 0xf5d77e60UL, - 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, 0x22d6f961UL, - 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, - 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, - 0xef189647UL, 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, - 0x5326b1daUL, 0xeb9ad6bfUL, 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, 0x842736dbUL, 0x96929935UL, - 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, 0xbb838120UL, - 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, - 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, - 0xb9c2a15cUL, 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, - 0x94d3b949UL, 0x090481f0UL, 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, 0xe9dbf6c3UL, 0x516791a6UL, - 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, 0xde0506f1UL}, - {0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, - 0xa4b8dc79UL, 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, - 0x4871b9f3UL, 0xde41be84UL, 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, 0xc0a86b64UL, 0x7af962fdUL, - 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, 0x727167a2UL, - 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, - 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, - 0x9995bacfUL, 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, - 0x3d2d66b6UL, 0x9041dc76UL, 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, 0xa5e4bf9fUL, 0x33d4b8e8UL, - 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, 0xf4516b6bUL, - 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, - 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, - 0xe230bbd4UL, 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, - 0x732d0444UL, 0xe51d0333UL, 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, 0x86200cc9UL, 0x25b56857UL, - 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, 0x810db42eUL, - 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, - 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, - 0x44930ff0UL, 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, - 0xe02bd389UL, 0x5a7ada10UL, 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, 0xe8a3d6d6UL, 0x7e93d1a1UL, - 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, 0xf64a0336UL, - 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, - 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, - 0x31cfd0b5UL, 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, - 0x85670772UL, 0x13570005UL, 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, 0x0dbed5e5UL, 0xb7efdc7cUL, - 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, 0x7747b718UL, - 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, - 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, - 0x660bdf40UL, 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, - 0xa6a3b424UL, 0x0536d0baUL, 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, 0x021b685dUL, 0x942b6f2aUL, - 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, 0x8def022dUL}, - {0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, - 0x49bbc2d1UL, 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, - 0xd370f478UL, 0x9241ef61UL, 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, 0x18a9009bUL, 0xdbfa2db0UL, - 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, 0x6177a9beUL, - 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, - 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, - 0x75911c89UL, 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, - 0x3c2ade58UL, 0x054f79f0UL, 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, 0x83e823a6UL, 0xc2d938bfUL, - 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, 0x545dbbbaUL, - 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, - 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, - 0x6438d04eUL, 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, - 0xab2448c9UL, 0xea1553d0UL, 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, 0x352a1204UL, 0xf2bc534bUL, - 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, 0xbb07919aUL, - 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, - 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, - 0x1a8a4171UL, 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, - 0x533183a0UL, 0x9062ae8bUL, 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, 0xe9bc07aeUL, 0xa88d1cb7UL, - 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, 0x6354e854UL, - 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, - 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, - 0xf5d06b51UL, 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, - 0xc870a09dUL, 0x8941bb84UL, 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, 0x03a9547eUL, 0xc0fa7955UL, - 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, 0xd85379ceUL, - 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, - 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, - 0x661b91a4UL, 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, - 0x7eb2913fUL, 0xb924d070UL, 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, 0x370953eeUL, 0x763848f7UL, - 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, 0x72fd2493UL}, - {0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, - 0x8f3bd10fUL, 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, - 0x1e77a21fUL, 0x291d601eUL, 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, 0xff98f713UL, 0xa626b111UL, - 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, 0xb9f80b3aUL, - 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, - 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, - 0x2299e620UL, 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, - 0xada2372fUL, 0xc08d9a70UL, 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, 0x72f11774UL, 0x459bd575UL, - 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, 0xb02ebc6cUL, - 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, - 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, - 0x7975914aUL, 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, - 0x4432cd41UL, 0x73580f40UL, 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, 0x09d6b756UL, 0x8cc0f853UL, - 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, 0x03fb295cUL, - 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, - 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, - 0xf0b813fdUL, 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, - 0x7f83c2f2UL, 0x263d84f0UL, 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, 0x605d78d9UL, 0x5737bad8UL, - 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, 0xb6d8efd4UL, - 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, - 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, - 0x436d86cdUL, 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, - 0xf2ea2295UL, 0xc580e094UL, 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, 0x13057799UL, 0x4abb319bUL, - 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, 0xb523c688UL, - 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, - 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, - 0x765575a4UL, 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, - 0x89cd82b7UL, 0x0cdbcdb2UL, 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, 0x06f653b8UL, 0x319c91b9UL, - 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, 0xed3498beUL}, - {0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, - 0x8a4f087dUL, 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, - 0x149f10faUL, 0x71f8ac42UL, 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, 0x1518112dUL, 0xfbb7a43fUL, - 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, 0xd00087b2UL, - 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, - 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, - 0x7da740d5UL, 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, - 0xf7e848a8UL, 0x3d58149bUL, 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, 0xe1077fbeUL, 0x8460c306UL, - 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, 0xa20f0dcbUL, - 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, - 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, - 0xed589329UL, 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, - 0xbb48f071UL, 0xde2f4cc9UL, 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, 0x720f8a79UL, 0xcb375de4UL, - 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, 0x41785599UL, - 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, - 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, - 0xa4e140bdUL, 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, - 0x2eae48c0UL, 0xc001fdd2UL, 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, 0x05196b4dUL, 0x607ed7f5UL, - 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, 0x61f9d622UL, - 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, - 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, - 0x479618efUL, 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, - 0xdab12653UL, 0xbfd69aebUL, 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, 0xdb362784UL, 0x35999296UL, - 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, 0x208183bbUL, - 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, - 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, - 0x5ca1c2b9UL, 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, - 0x49b9d394UL, 0xf0810409UL, 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, 0xc3f6dbe9UL, 0xa6916751UL, - 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, 0xf10605deUL -#endif - }}; diff --git a/base/poco/Foundation/src/deflate.c b/base/poco/Foundation/src/deflate.c deleted file mode 100644 index 0a80edd92d8..00000000000 --- a/base/poco/Foundation/src/deflate.c +++ /dev/null @@ -1,2163 +0,0 @@ -/* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process depends on being able to identify portions - * of the input text which are identical to earlier input (within a - * sliding window trailing behind the input currently being processed). - * - * The most straightforward technique turns out to be the fastest for - * most input files: try all possible matches and select the longest. - * The key feature of this algorithm is that insertions into the string - * dictionary are very simple and thus fast, and deletions are avoided - * completely. Insertions are performed at each input character, whereas - * string matches are performed only when the previous match ends. So it - * is preferable to spend more time in matches to allow very fast string - * insertions and avoid deletions. The matching algorithm for small - * strings is inspired from that of Rabin & Karp. A brute force approach - * is used to find longer strings when a small match has been found. - * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze - * (by Leonid Broukhis). - * A previous version of this file used a more sophisticated algorithm - * (by Fiala and Greene) which is guaranteed to run in linear amortized - * time, but has a larger average cost, uses more memory and is patented. - * However the F&G algorithm may be faster for some highly redundant - * files if the parameter max_chain_length (described below) is too large. - * - * ACKNOWLEDGEMENTS - * - * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and - * I found it in 'freeze' written by Leonid Broukhis. - * Thanks to many people for bug reports and testing. - * - * REFERENCES - * - * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". - * Available in http://tools.ietf.org/html/rfc1951 - * - * A description of the Rabin and Karp algorithm is given in the book - * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. - * - * Fiala,E.R., and Greene,D.H. - * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 - * - */ - -/* @(#) $Id$ */ - -#include "deflate.h" - -const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -/* - If you use the zlib library in a product, an acknowledgment is welcome - in the documentation of your product. If for some reason you cannot - include such an acknowledgment, I would appreciate that you keep this - copyright string in the executable of your product. - */ - -/* =========================================================================== - * Function prototypes. - */ -typedef enum { - need_more, /* block not completed, need more input or more output */ - block_done, /* block flush performed */ - finish_started, /* finish started, need only more output at next deflate */ - finish_done /* finish done, accept no more input or output */ -} block_state; - -typedef block_state (*compress_func) OF((deflate_state *s, int flush)); -/* Compression function. Returns the block state after the call. */ - -local int deflateStateCheck OF((z_streamp strm)); -local void slide_hash OF((deflate_state *s)); -local void fill_window OF((deflate_state *s)); -local block_state deflate_stored OF((deflate_state *s, int flush)); -local block_state deflate_fast OF((deflate_state *s, int flush)); -#ifndef FASTEST -local block_state deflate_slow OF((deflate_state *s, int flush)); -#endif -local block_state deflate_rle OF((deflate_state *s, int flush)); -local block_state deflate_huff OF((deflate_state *s, int flush)); -local void lm_init OF((deflate_state *s)); -local void putShortMSB OF((deflate_state *s, uInt b)); -local void flush_pending OF((z_streamp strm)); -local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -#ifdef ASMV -# pragma message("Assembler code may have bugs -- use at your own risk") - void match_init OF((void)); /* asm code initialization */ - uInt longest_match OF((deflate_state *s, IPos cur_match)); -#else -local uInt longest_match OF((deflate_state *s, IPos cur_match)); -#endif - -#ifdef ZLIB_DEBUG -local void check_match OF((deflate_state *s, IPos start, IPos match, - int length)); -#endif - -/* =========================================================================== - * Local data - */ - -#define NIL 0 -/* Tail of hash chains */ - -#ifndef TOO_FAR -# define TOO_FAR 4096 -#endif -/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ - -/* Values for max_lazy_match, good_match and max_chain_length, depending on - * the desired pack level (0..9). The values given below have been tuned to - * exclude worst case performance for pathological files. Better values may be - * found for specific files. - */ -typedef struct config_s { - ush good_length; /* reduce lazy search above this match length */ - ush max_lazy; /* do not perform lazy search above this match length */ - ush nice_length; /* quit search above this match length */ - ush max_chain; - compress_func func; -} config; - -#ifdef FASTEST -local const config configuration_table[2] = { -/* good lazy nice chain */ -/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ -#else -local const config configuration_table[10] = { -/* good lazy nice chain */ -/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ -/* 2 */ {4, 5, 16, 8, deflate_fast}, -/* 3 */ {4, 6, 32, 32, deflate_fast}, - -/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ -/* 5 */ {8, 16, 32, 32, deflate_slow}, -/* 6 */ {8, 16, 128, 128, deflate_slow}, -/* 7 */ {8, 32, 128, 256, deflate_slow}, -/* 8 */ {32, 128, 258, 1024, deflate_slow}, -/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ -#endif - -/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 - * For deflate_fast() (levels <= 3) good is ignored and lazy has a different - * meaning. - */ - -/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ -#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) - -/* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to UPDATE_HASH are made with consecutive input - * characters, so that a running hash key can be computed from the previous - * key instead of complete recalculation each time. - */ -#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) - - -/* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head - * of the hash chain (the most recent string with same hash key). Return - * the previous length of the hash chain. - * If this file is compiled with -DFASTEST, the compression level is forced - * to 1, and no hash chains are maintained. - * IN assertion: all calls to INSERT_STRING are made with consecutive input - * characters and the first MIN_MATCH bytes of str are valid (except for - * the last MIN_MATCH-1 bytes of the input file). - */ -#ifdef FASTEST -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - match_head = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) -#else -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) -#endif - -/* =========================================================================== - * Initialize the hash table (avoiding 64K overflow for 16 bit systems). - * prev[] will be initialized on the fly. - */ -#define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); - -/* =========================================================================== - * Slide the hash table when sliding the window down (could be avoided with 32 - * bit values at the expense of memory usage). We slide even when level == 0 to - * keep the hash table consistent if we switch back to level > 0 later. - */ -local void slide_hash(s) - deflate_state *s; -{ - unsigned n, m; - Posf *p; - uInt wsize = s->w_size; - - n = s->hash_size; - p = &s->head[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m - wsize : NIL); - } while (--n); - n = wsize; -#ifndef FASTEST - p = &s->prev[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m - wsize : NIL); - /* If n is not on any hash chain, prev[n] is garbage but - * its value will never be used. - */ - } while (--n); -#endif -} - -/* ========================================================================= */ -int ZEXPORT deflateInit_(strm, level, version, stream_size) - z_streamp strm; - int level; - const char *version; - int stream_size; -{ - return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, - Z_DEFAULT_STRATEGY, version, stream_size); - /* To do: ignore strm->next_in if we use it as window */ -} - -/* ========================================================================= */ -int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - version, stream_size) - z_streamp strm; - int level; - int method; - int windowBits; - int memLevel; - int strategy; - const char *version; - int stream_size; -{ - deflate_state *s; - int wrap = 1; - static const char my_version[] = ZLIB_VERSION; - - ushf *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - - if (version == Z_NULL || version[0] != my_version[0] || - stream_size != sizeof(z_stream)) { - return Z_VERSION_ERROR; - } - if (strm == Z_NULL) return Z_STREAM_ERROR; - - strm->msg = Z_NULL; - if (strm->zalloc == (alloc_func)0) { -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; -#endif - } - if (strm->zfree == (free_func)0) -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zfree = zcfree; -#endif - -#ifdef FASTEST - if (level != 0) level = 1; -#else - if (level == Z_DEFAULT_COMPRESSION) level = 6; -#endif - - if (windowBits < 0) { /* suppress zlib wrapper */ - wrap = 0; - windowBits = -windowBits; - } -#ifdef GZIP - else if (windowBits > 15) { - wrap = 2; /* write gzip wrapper instead */ - windowBits -= 16; - } -#endif - if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || - windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || - strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { - return Z_STREAM_ERROR; - } - if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ - s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); - if (s == Z_NULL) return Z_MEM_ERROR; - strm->state = (struct internal_state FAR *)s; - s->strm = strm; - s->status = INIT_STATE; /* to pass state test in deflateReset() */ - - s->wrap = wrap; - s->gzhead = Z_NULL; - s->w_bits = (uInt)windowBits; - s->w_size = 1 << s->w_bits; - s->w_mask = s->w_size - 1; - - s->hash_bits = (uInt)memLevel + 7; - s->hash_size = 1 << s->hash_bits; - s->hash_mask = s->hash_size - 1; - s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); - - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); - s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); - s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); - - s->high_water = 0; /* nothing written to s->window yet */ - - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - - overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - s->pending_buf = (uchf *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); - - if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || - s->pending_buf == Z_NULL) { - s->status = FINISH_STATE; - strm->msg = ERR_MSG(Z_MEM_ERROR); - deflateEnd (strm); - return Z_MEM_ERROR; - } - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; - - s->level = level; - s->strategy = strategy; - s->method = (Byte)method; - - return deflateReset(strm); -} - -/* ========================================================================= - * Check for a valid deflate stream state. Return 0 if ok, 1 if not. - */ -local int deflateStateCheck (strm) - z_streamp strm; -{ - deflate_state *s; - if (strm == Z_NULL || - strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) - return 1; - s = strm->state; - if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && -#ifdef GZIP - s->status != GZIP_STATE && -#endif - s->status != EXTRA_STATE && - s->status != NAME_STATE && - s->status != COMMENT_STATE && - s->status != HCRC_STATE && - s->status != BUSY_STATE && - s->status != FINISH_STATE)) - return 1; - return 0; -} - -/* ========================================================================= */ -int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) - z_streamp strm; - const Bytef *dictionary; - uInt dictLength; -{ - deflate_state *s; - uInt str, n; - int wrap; - unsigned avail; - z_const unsigned char *next; - - if (deflateStateCheck(strm) || dictionary == Z_NULL) - return Z_STREAM_ERROR; - s = strm->state; - wrap = s->wrap; - if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) - return Z_STREAM_ERROR; - - /* when using zlib wrappers, compute Adler-32 for provided dictionary */ - if (wrap == 1) - strm->adler = adler32(strm->adler, dictionary, dictLength); - s->wrap = 0; /* avoid computing Adler-32 in read_buf */ - - /* if dictionary would fill window, just replace the history */ - if (dictLength >= s->w_size) { - if (wrap == 0) { /* already empty otherwise */ - CLEAR_HASH(s); - s->strstart = 0; - s->block_start = 0L; - s->insert = 0; - } - dictionary += dictLength - s->w_size; /* use the tail */ - dictLength = s->w_size; - } - - /* insert dictionary into window and hash */ - avail = strm->avail_in; - next = strm->next_in; - strm->avail_in = dictLength; - strm->next_in = (z_const Bytef *)dictionary; - fill_window(s); - while (s->lookahead >= MIN_MATCH) { - str = s->strstart; - n = s->lookahead - (MIN_MATCH-1); - do { - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); -#ifndef FASTEST - s->prev[str & s->w_mask] = s->head[s->ins_h]; -#endif - s->head[s->ins_h] = (Pos)str; - str++; - } while (--n); - s->strstart = str; - s->lookahead = MIN_MATCH-1; - fill_window(s); - } - s->strstart += s->lookahead; - s->block_start = (long)s->strstart; - s->insert = s->lookahead; - s->lookahead = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - strm->next_in = next; - strm->avail_in = avail; - s->wrap = wrap; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) - z_streamp strm; - Bytef *dictionary; - uInt *dictLength; -{ - deflate_state *s; - uInt len; - - if (deflateStateCheck(strm)) - return Z_STREAM_ERROR; - s = strm->state; - len = s->strstart + s->lookahead; - if (len > s->w_size) - len = s->w_size; - if (dictionary != Z_NULL && len) - zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); - if (dictLength != Z_NULL) - *dictLength = len; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateResetKeep (strm) - z_streamp strm; -{ - deflate_state *s; - - if (deflateStateCheck(strm)) { - return Z_STREAM_ERROR; - } - - strm->total_in = strm->total_out = 0; - strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ - strm->data_type = Z_UNKNOWN; - - s = (deflate_state *)strm->state; - s->pending = 0; - s->pending_out = s->pending_buf; - - if (s->wrap < 0) { - s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ - } - s->status = -#ifdef GZIP - s->wrap == 2 ? GZIP_STATE : -#endif - s->wrap ? INIT_STATE : BUSY_STATE; - strm->adler = -#ifdef GZIP - s->wrap == 2 ? crc32(0L, Z_NULL, 0) : -#endif - adler32(0L, Z_NULL, 0); - s->last_flush = Z_NO_FLUSH; - - _tr_init(s); - - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateReset (strm) - z_streamp strm; -{ - int ret; - - ret = deflateResetKeep(strm); - if (ret == Z_OK) - lm_init(strm->state); - return ret; -} - -/* ========================================================================= */ -int ZEXPORT deflateSetHeader (strm, head) - z_streamp strm; - gz_headerp head; -{ - if (deflateStateCheck(strm) || strm->state->wrap != 2) - return Z_STREAM_ERROR; - strm->state->gzhead = head; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflatePending (strm, pending, bits) - unsigned *pending; - int *bits; - z_streamp strm; -{ - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - if (pending != Z_NULL) - *pending = strm->state->pending; - if (bits != Z_NULL) - *bits = strm->state->bi_valid; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflatePrime (strm, bits, value) - z_streamp strm; - int bits; - int value; -{ - deflate_state *s; - int put; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; - if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) - return Z_BUF_ERROR; - do { - put = Buf_size - s->bi_valid; - if (put > bits) - put = bits; - s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); - s->bi_valid += put; - _tr_flush_bits(s); - value >>= put; - bits -= put; - } while (bits); - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateParams(strm, level, strategy) - z_streamp strm; - int level; - int strategy; -{ - deflate_state *s; - compress_func func; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; - -#ifdef FASTEST - if (level != 0) level = 1; -#else - if (level == Z_DEFAULT_COMPRESSION) level = 6; -#endif - if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { - return Z_STREAM_ERROR; - } - func = configuration_table[s->level].func; - - if ((strategy != s->strategy || func != configuration_table[level].func) && - s->high_water) { - /* Flush the last buffer: */ - int err = deflate(strm, Z_BLOCK); - if (err == Z_STREAM_ERROR) - return err; - if (strm->avail_out == 0) - return Z_BUF_ERROR; - } - if (s->level != level) { - if (s->level == 0 && s->matches != 0) { - if (s->matches == 1) - slide_hash(s); - else - CLEAR_HASH(s); - s->matches = 0; - } - s->level = level; - s->max_lazy_match = configuration_table[level].max_lazy; - s->good_match = configuration_table[level].good_length; - s->nice_match = configuration_table[level].nice_length; - s->max_chain_length = configuration_table[level].max_chain; - } - s->strategy = strategy; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) - z_streamp strm; - int good_length; - int max_lazy; - int nice_length; - int max_chain; -{ - deflate_state *s; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; - s->good_match = (uInt)good_length; - s->max_lazy_match = (uInt)max_lazy; - s->nice_match = nice_length; - s->max_chain_length = (uInt)max_chain; - return Z_OK; -} - -/* ========================================================================= - * For the default windowBits of 15 and memLevel of 8, this function returns - * a close to exact, as well as small, upper bound on the compressed size. - * They are coded as constants here for a reason--if the #define's are - * changed, then this function needs to be changed as well. The return - * value for 15 and 8 only works for those exact settings. - * - * For any setting other than those defaults for windowBits and memLevel, - * the value returned is a conservative worst case for the maximum expansion - * resulting from using fixed blocks instead of stored blocks, which deflate - * can emit on compressed data for some combinations of the parameters. - * - * This function could be more sophisticated to provide closer upper bounds for - * every combination of windowBits and memLevel. But even the conservative - * upper bound of about 14% expansion does not seem onerous for output buffer - * allocation. - */ -uLong ZEXPORT deflateBound(strm, sourceLen) - z_streamp strm; - uLong sourceLen; -{ - deflate_state *s; - uLong complen, wraplen; - - /* conservative upper bound for compressed data */ - complen = sourceLen + - ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; - - /* if can't get parameters, return conservative bound plus zlib wrapper */ - if (deflateStateCheck(strm)) - return complen + 6; - - /* compute wrapper length */ - s = strm->state; - switch (s->wrap) { - case 0: /* raw deflate */ - wraplen = 0; - break; - case 1: /* zlib wrapper */ - wraplen = 6 + (s->strstart ? 4 : 0); - break; -#ifdef GZIP - case 2: /* gzip wrapper */ - wraplen = 18; - if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ - Bytef *str; - if (s->gzhead->extra != Z_NULL) - wraplen += 2 + s->gzhead->extra_len; - str = s->gzhead->name; - if (str != Z_NULL) - do { - wraplen++; - } while (*str++); - str = s->gzhead->comment; - if (str != Z_NULL) - do { - wraplen++; - } while (*str++); - if (s->gzhead->hcrc) - wraplen += 2; - } - break; -#endif - default: /* for compiler happiness */ - wraplen = 6; - } - - /* if not default parameters, return conservative bound */ - if (s->w_bits != 15 || s->hash_bits != 8 + 7) - return complen + wraplen; - - /* default settings: return tight bound for that case */ - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + - (sourceLen >> 25) + 13 - 6 + wraplen; -} - -/* ========================================================================= - * Put a short in the pending buffer. The 16-bit value is put in MSB order. - * IN assertion: the stream state is correct and there is enough room in - * pending_buf. - */ -local void putShortMSB (s, b) - deflate_state *s; - uInt b; -{ - put_byte(s, (Byte)(b >> 8)); - put_byte(s, (Byte)(b & 0xff)); -} - -/* ========================================================================= - * Flush as much pending output as possible. All deflate() output, except for - * some deflate_stored() output, goes through this function so some - * applications may wish to modify it to avoid allocating a large - * strm->next_out buffer and copying into it. (See also read_buf()). - */ -local void flush_pending(strm) - z_streamp strm; -{ - unsigned len; - deflate_state *s = strm->state; - - _tr_flush_bits(s); - len = s->pending; - if (len > strm->avail_out) len = strm->avail_out; - if (len == 0) return; - - zmemcpy(strm->next_out, s->pending_out, len); - strm->next_out += len; - s->pending_out += len; - strm->total_out += len; - strm->avail_out -= len; - s->pending -= len; - if (s->pending == 0) { - s->pending_out = s->pending_buf; - } -} - -/* =========================================================================== - * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. - */ -#define HCRC_UPDATE(beg) \ - do { \ - if (s->gzhead->hcrc && s->pending > (beg)) \ - strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ - s->pending - (beg)); \ - } while (0) - -/* ========================================================================= */ -int ZEXPORT deflate (strm, flush) - z_streamp strm; - int flush; -{ - int old_flush; /* value of flush param for previous deflate call */ - deflate_state *s; - - if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { - return Z_STREAM_ERROR; - } - s = strm->state; - - if (strm->next_out == Z_NULL || - (strm->avail_in != 0 && strm->next_in == Z_NULL) || - (s->status == FINISH_STATE && flush != Z_FINISH)) { - ERR_RETURN(strm, Z_STREAM_ERROR); - } - if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); - - old_flush = s->last_flush; - s->last_flush = flush; - - /* Flush as much pending output as possible */ - if (s->pending != 0) { - flush_pending(strm); - if (strm->avail_out == 0) { - /* Since avail_out is 0, deflate will be called again with - * more output space, but possibly with both pending and - * avail_in equal to zero. There won't be anything to do, - * but this is not an error situation so make sure we - * return OK instead of BUF_ERROR at next call of deflate: - */ - s->last_flush = -1; - return Z_OK; - } - - /* Make sure there is something to do and avoid duplicate consecutive - * flushes. For repeated and useless calls with Z_FINISH, we keep - * returning Z_STREAM_END instead of Z_BUF_ERROR. - */ - } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && - flush != Z_FINISH) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* User must not provide more input after the first FINISH: */ - if (s->status == FINISH_STATE && strm->avail_in != 0) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* Write the header */ - if (s->status == INIT_STATE) { - /* zlib header */ - uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; - uInt level_flags; - - if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) - level_flags = 0; - else if (s->level < 6) - level_flags = 1; - else if (s->level == 6) - level_flags = 2; - else - level_flags = 3; - header |= (level_flags << 6); - if (s->strstart != 0) header |= PRESET_DICT; - header += 31 - (header % 31); - - putShortMSB(s, header); - - /* Save the adler32 of the preset dictionary: */ - if (s->strstart != 0) { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - } - strm->adler = adler32(0L, Z_NULL, 0); - s->status = BUSY_STATE; - - /* Compression must start with an empty pending buffer */ - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } -#ifdef GZIP - if (s->status == GZIP_STATE) { - /* gzip header */ - strm->adler = crc32(0L, Z_NULL, 0); - put_byte(s, 31); - put_byte(s, 139); - put_byte(s, 8); - if (s->gzhead == Z_NULL) { - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, s->level == 9 ? 2 : - (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? - 4 : 0)); - put_byte(s, OS_CODE); - s->status = BUSY_STATE; - - /* Compression must start with an empty pending buffer */ - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } - else { - put_byte(s, (s->gzhead->text ? 1 : 0) + - (s->gzhead->hcrc ? 2 : 0) + - (s->gzhead->extra == Z_NULL ? 0 : 4) + - (s->gzhead->name == Z_NULL ? 0 : 8) + - (s->gzhead->comment == Z_NULL ? 0 : 16) - ); - put_byte(s, (Byte)(s->gzhead->time & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); - put_byte(s, s->level == 9 ? 2 : - (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? - 4 : 0)); - put_byte(s, s->gzhead->os & 0xff); - if (s->gzhead->extra != Z_NULL) { - put_byte(s, s->gzhead->extra_len & 0xff); - put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); - } - if (s->gzhead->hcrc) - strm->adler = crc32(strm->adler, s->pending_buf, - s->pending); - s->gzindex = 0; - s->status = EXTRA_STATE; - } - } - if (s->status == EXTRA_STATE) { - if (s->gzhead->extra != Z_NULL) { - ulg beg = s->pending; /* start of bytes to update crc */ - uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; - while (s->pending + left > s->pending_buf_size) { - uInt copy = s->pending_buf_size - s->pending; - zmemcpy(s->pending_buf + s->pending, - s->gzhead->extra + s->gzindex, copy); - s->pending = s->pending_buf_size; - HCRC_UPDATE(beg); - s->gzindex += copy; - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - beg = 0; - left -= copy; - } - zmemcpy(s->pending_buf + s->pending, - s->gzhead->extra + s->gzindex, left); - s->pending += left; - HCRC_UPDATE(beg); - s->gzindex = 0; - } - s->status = NAME_STATE; - } - if (s->status == NAME_STATE) { - if (s->gzhead->name != Z_NULL) { - ulg beg = s->pending; /* start of bytes to update crc */ - int val; - do { - if (s->pending == s->pending_buf_size) { - HCRC_UPDATE(beg); - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - beg = 0; - } - val = s->gzhead->name[s->gzindex++]; - put_byte(s, val); - } while (val != 0); - HCRC_UPDATE(beg); - s->gzindex = 0; - } - s->status = COMMENT_STATE; - } - if (s->status == COMMENT_STATE) { - if (s->gzhead->comment != Z_NULL) { - ulg beg = s->pending; /* start of bytes to update crc */ - int val; - do { - if (s->pending == s->pending_buf_size) { - HCRC_UPDATE(beg); - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - beg = 0; - } - val = s->gzhead->comment[s->gzindex++]; - put_byte(s, val); - } while (val != 0); - HCRC_UPDATE(beg); - } - s->status = HCRC_STATE; - } - if (s->status == HCRC_STATE) { - if (s->gzhead->hcrc) { - if (s->pending + 2 > s->pending_buf_size) { - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } - put_byte(s, (Byte)(strm->adler & 0xff)); - put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); - strm->adler = crc32(0L, Z_NULL, 0); - } - s->status = BUSY_STATE; - - /* Compression must start with an empty pending buffer */ - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } -#endif - - /* Start a new block or continue the current one. - */ - if (strm->avail_in != 0 || s->lookahead != 0 || - (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { - block_state bstate; - - bstate = s->level == 0 ? deflate_stored(s, flush) : - s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : - s->strategy == Z_RLE ? deflate_rle(s, flush) : - (*(configuration_table[s->level].func))(s, flush); - - if (bstate == finish_started || bstate == finish_done) { - s->status = FINISH_STATE; - } - if (bstate == need_more || bstate == finish_started) { - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ - } - return Z_OK; - /* If flush != Z_NO_FLUSH && avail_out == 0, the next call - * of deflate should use the same flush parameter to make sure - * that the flush is complete. So we don't have to output an - * empty block here, this will be done at next call. This also - * ensures that for a very small output buffer, we emit at most - * one empty block. - */ - } - if (bstate == block_done) { - if (flush == Z_PARTIAL_FLUSH) { - _tr_align(s); - } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ - _tr_stored_block(s, (char*)0, 0L, 0); - /* For a full flush, this empty block will be recognized - * as a special marker by inflate_sync(). - */ - if (flush == Z_FULL_FLUSH) { - CLEAR_HASH(s); /* forget history */ - if (s->lookahead == 0) { - s->strstart = 0; - s->block_start = 0L; - s->insert = 0; - } - } - } - flush_pending(strm); - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ - return Z_OK; - } - } - } - - if (flush != Z_FINISH) return Z_OK; - if (s->wrap <= 0) return Z_STREAM_END; - - /* Write the trailer */ -#ifdef GZIP - if (s->wrap == 2) { - put_byte(s, (Byte)(strm->adler & 0xff)); - put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); - put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); - put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); - put_byte(s, (Byte)(strm->total_in & 0xff)); - put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); - put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); - put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); - } - else -#endif - { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - } - flush_pending(strm); - /* If avail_out is zero, the application will call deflate again - * to flush the rest. - */ - if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ - return s->pending != 0 ? Z_OK : Z_STREAM_END; -} - -/* ========================================================================= */ -int ZEXPORT deflateEnd (strm) - z_streamp strm; -{ - int status; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - - status = strm->state->status; - - /* Deallocate in reverse order of allocations: */ - TRY_FREE(strm, strm->state->pending_buf); - TRY_FREE(strm, strm->state->head); - TRY_FREE(strm, strm->state->prev); - TRY_FREE(strm, strm->state->window); - - ZFREE(strm, strm->state); - strm->state = Z_NULL; - - return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; -} - -/* ========================================================================= - * Copy the source state to the destination state. - * To simplify the source, this is not supported for 16-bit MS-DOS (which - * doesn't have enough memory anyway to duplicate compression states). - */ -int ZEXPORT deflateCopy (dest, source) - z_streamp dest; - z_streamp source; -{ -#ifdef MAXSEG_64K - return Z_STREAM_ERROR; -#else - deflate_state *ds; - deflate_state *ss; - ushf *overlay; - - - if (deflateStateCheck(source) || dest == Z_NULL) { - return Z_STREAM_ERROR; - } - - ss = source->state; - - zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); - - ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); - if (ds == Z_NULL) return Z_MEM_ERROR; - dest->state = (struct internal_state FAR *) ds; - zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); - ds->strm = dest; - - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); - ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); - ds->pending_buf = (uchf *) overlay; - - if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || - ds->pending_buf == Z_NULL) { - deflateEnd (dest); - return Z_MEM_ERROR; - } - /* following zmemcpy do not work for 16-bit MS-DOS */ - zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); - zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); - zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; - ds->bl_desc.dyn_tree = ds->bl_tree; - - return Z_OK; -#endif /* MAXSEG_64K */ -} - -/* =========================================================================== - * Read a new buffer from the current input stream, update the adler32 - * and total number of bytes read. All deflate() input goes through - * this function so some applications may wish to modify it to avoid - * allocating a large strm->next_in buffer and copying from it. - * (See also flush_pending()). - */ -local unsigned read_buf(strm, buf, size) - z_streamp strm; - Bytef *buf; - unsigned size; -{ - unsigned len = strm->avail_in; - - if (len > size) len = size; - if (len == 0) return 0; - - strm->avail_in -= len; - - zmemcpy(buf, strm->next_in, len); - if (strm->state->wrap == 1) { - strm->adler = adler32(strm->adler, buf, len); - } -#ifdef GZIP - else if (strm->state->wrap == 2) { - strm->adler = crc32(strm->adler, buf, len); - } -#endif - strm->next_in += len; - strm->total_in += len; - - return len; -} - -/* =========================================================================== - * Initialize the "longest match" routines for a new zlib stream - */ -local void lm_init (s) - deflate_state *s; -{ - s->window_size = (ulg)2L*s->w_size; - - CLEAR_HASH(s); - - /* Set the default configuration parameters: - */ - s->max_lazy_match = configuration_table[s->level].max_lazy; - s->good_match = configuration_table[s->level].good_length; - s->nice_match = configuration_table[s->level].nice_length; - s->max_chain_length = configuration_table[s->level].max_chain; - - s->strstart = 0; - s->block_start = 0L; - s->lookahead = 0; - s->insert = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - s->ins_h = 0; -#ifndef FASTEST -#ifdef ASMV - match_init(); /* initialize the asm code */ -#endif -#endif -} - -#ifndef FASTEST -/* =========================================================================== - * Set match_start to the longest match starting at the given string and - * return its length. Matches shorter or equal to prev_length are discarded, - * in which case the result is equal to prev_length and match_start is - * garbage. - * IN assertions: cur_match is the head of the hash chain for the current - * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 - * OUT assertion: the match length is not greater than s->lookahead. - */ -#ifndef ASMV -/* For 80x86 and 680x0, an optimized version will be provided in match.asm or - * match.S. The code will be functionally equivalent. - */ -local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ - register int len; /* length of current match */ - int best_len = (int)s->prev_length; /* best match length so far */ - int nice_match = s->nice_match; /* stop if match long enough */ - IPos limit = s->strstart > (IPos)MAX_DIST(s) ? - s->strstart - (IPos)MAX_DIST(s) : NIL; - /* Stop when cur_match becomes <= limit. To simplify the code, - * we prevent matches with the string of window index 0. - */ - Posf *prev = s->prev; - uInt wmask = s->w_mask; - -#ifdef UNALIGNED_OK - /* Compare two bytes at a time. Note: this is not always beneficial. - * Try with and without -DUNALIGNED_OK to check. - */ - register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; - register ush scan_start = *(ushf*)scan; - register ush scan_end = *(ushf*)(scan+best_len-1); -#else - register Bytef *strend = s->window + s->strstart + MAX_MATCH; - register Byte scan_end1 = scan[best_len-1]; - register Byte scan_end = scan[best_len]; -#endif - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - /* Do not waste too much time if we already have a good match: */ - if (s->prev_length >= s->good_match) { - chain_length >>= 2; - } - /* Do not look for matches beyond the end of the input. This is necessary - * to make deflate deterministic. - */ - if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - do { - Assert(cur_match < s->strstart, "no future"); - match = s->window + cur_match; - - /* Skip to next match if the match length cannot increase - * or if the match length is less than 2. Note that the checks below - * for insufficient lookahead only occur occasionally for performance - * reasons. Therefore uninitialized memory will be accessed, and - * conditional jumps will be made that depend on those values. - * However the length of the match is limited to the lookahead, so - * the output of deflate is not affected by the uninitialized values. - */ -#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) - /* This code assumes sizeof(unsigned short) == 2. Do not use - * UNALIGNED_OK if your compiler uses a different size. - */ - if (*(ushf*)(match+best_len-1) != scan_end || - *(ushf*)match != scan_start) continue; - - /* It is not necessary to compare scan[2] and match[2] since they are - * always equal when the other bytes match, given that the hash keys - * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at - * strstart+3, +5, ... up to strstart+257. We check for insufficient - * lookahead only every 4th comparison; the 128th check will be made - * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is - * necessary to put more guard bytes at the end of the window, or - * to check more often for insufficient lookahead. - */ - Assert(scan[2] == match[2], "scan[2]?"); - scan++, match++; - do { - } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - scan < strend); - /* The funny "do {}" generates better code on most compilers */ - - /* Here, scan <= window+strstart+257 */ - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - if (*scan == *match) scan++; - - len = (MAX_MATCH - 1) - (int)(strend-scan); - scan = strend - (MAX_MATCH-1); - -#else /* UNALIGNED_OK */ - - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match++; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - scan = strend - MAX_MATCH; - -#endif /* UNALIGNED_OK */ - - if (len > best_len) { - s->match_start = cur_match; - best_len = len; - if (len >= nice_match) break; -#ifdef UNALIGNED_OK - scan_end = *(ushf*)(scan+best_len-1); -#else - scan_end1 = scan[best_len-1]; - scan_end = scan[best_len]; -#endif - } - } while ((cur_match = prev[cur_match & wmask]) > limit - && --chain_length != 0); - - if ((uInt)best_len <= s->lookahead) return (uInt)best_len; - return s->lookahead; -} -#endif /* ASMV */ - -#else /* FASTEST */ - -/* --------------------------------------------------------------------------- - * Optimized version for FASTEST only - */ -local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ - register int len; /* length of current match */ - register Bytef *strend = s->window + s->strstart + MAX_MATCH; - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - Assert(cur_match < s->strstart, "no future"); - - match = s->window + cur_match; - - /* Return failure if the match length is less than 2: - */ - if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match += 2; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - - if (len < MIN_MATCH) return MIN_MATCH - 1; - - s->match_start = cur_match; - return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; -} - -#endif /* FASTEST */ - -#ifdef ZLIB_DEBUG - -#define EQUAL 0 -/* result of memcmp for equal strings */ - -/* =========================================================================== - * Check that the match at match_start is indeed a match. - */ -local void check_match(s, start, match, length) - deflate_state *s; - IPos start, match; - int length; -{ - /* check that the match is indeed a match */ - if (zmemcmp(s->window + match, - s->window + start, length) != EQUAL) { - fprintf(stderr, " start %u, match %u, length %d\n", - start, match, length); - do { - fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); - } while (--length != 0); - z_error("invalid match"); - } - if (z_verbose > 1) { - fprintf(stderr,"\\[%d,%d]", start-match, length); - do { putc(s->window[start++], stderr); } while (--length != 0); - } -} -#else -# define check_match(s, start, match, length) -#endif /* ZLIB_DEBUG */ - -/* =========================================================================== - * Fill the window when the lookahead becomes insufficient. - * Updates strstart and lookahead. - * - * IN assertion: lookahead < MIN_LOOKAHEAD - * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD - * At least one byte has been read, or avail_in == 0; reads are - * performed for at least two bytes (required for the zip translate_eol - * option -- not supported here). - */ -local void fill_window(s) - deflate_state *s; -{ - unsigned n; - unsigned more; /* Amount of free space at the end of the window. */ - uInt wsize = s->w_size; - - Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); - - do { - more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); - - /* Deal with !@#$% 64K limit: */ - if (sizeof(int) <= 2) { - if (more == 0 && s->strstart == 0 && s->lookahead == 0) { - more = wsize; - - } else if (more == (unsigned)(-1)) { - /* Very unlikely, but possible on 16 bit machine if - * strstart == 0 && lookahead == 1 (input done a byte at time) - */ - more--; - } - } - - /* If the window is almost full and there is insufficient lookahead, - * move the upper half to the lower one to make room in the upper half. - */ - if (s->strstart >= wsize+MAX_DIST(s)) { - - zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more); - s->match_start -= wsize; - s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ - s->block_start -= (long) wsize; - slide_hash(s); - more += wsize; - } - if (s->strm->avail_in == 0) break; - - /* If there was no sliding: - * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && - * more == window_size - lookahead - strstart - * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) - * => more >= window_size - 2*WSIZE + 2 - * In the BIG_MEM or MMAP case (not yet supported), - * window_size == input_size + MIN_LOOKAHEAD && - * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. - * Otherwise, window_size == 2*WSIZE so more >= 2. - * If there was sliding, more >= WSIZE. So in all cases, more >= 2. - */ - Assert(more >= 2, "more < 2"); - - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); - s->lookahead += n; - - /* Initialize the hash value now that we have some input: */ - if (s->lookahead + s->insert >= MIN_MATCH) { - uInt str = s->strstart - s->insert; - s->ins_h = s->window[str]; - UPDATE_HASH(s, s->ins_h, s->window[str + 1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - while (s->insert) { - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); -#ifndef FASTEST - s->prev[str & s->w_mask] = s->head[s->ins_h]; -#endif - s->head[s->ins_h] = (Pos)str; - str++; - s->insert--; - if (s->lookahead + s->insert < MIN_MATCH) - break; - } - } - /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, - * but this is not important since only literal bytes will be emitted. - */ - - } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); - - /* If the WIN_INIT bytes after the end of the current data have never been - * written, then zero those bytes in order to avoid memory check reports of - * the use of uninitialized (or uninitialised as Julian writes) bytes by - * the longest match routines. Update the high water mark for the next - * time through here. WIN_INIT is set to MAX_MATCH since the longest match - * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. - */ - if (s->high_water < s->window_size) { - ulg curr = s->strstart + (ulg)(s->lookahead); - ulg init; - - if (s->high_water < curr) { - /* Previous high water mark below current data -- zero WIN_INIT - * bytes or up to end of window, whichever is less. - */ - init = s->window_size - curr; - if (init > WIN_INIT) - init = WIN_INIT; - zmemzero(s->window + curr, (unsigned)init); - s->high_water = curr + init; - } - else if (s->high_water < (ulg)curr + WIN_INIT) { - /* High water mark at or above current data, but below current data - * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up - * to end of window, whichever is less. - */ - init = (ulg)curr + WIN_INIT - s->high_water; - if (init > s->window_size - s->high_water) - init = s->window_size - s->high_water; - zmemzero(s->window + s->high_water, (unsigned)init); - s->high_water += init; - } - } - - Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, - "not enough room for search"); -} - -/* =========================================================================== - * Flush the current block, with given end-of-file flag. - * IN assertion: strstart is set to the end of the current match. - */ -#define FLUSH_BLOCK_ONLY(s, last) { \ - _tr_flush_block(s, (s->block_start >= 0L ? \ - (charf *)&s->window[(unsigned)s->block_start] : \ - (charf *)Z_NULL), \ - (ulg)((long)s->strstart - s->block_start), \ - (last)); \ - s->block_start = s->strstart; \ - flush_pending(s->strm); \ - Tracev((stderr,"[FLUSH]")); \ -} - -/* Same but force premature exit if necessary. */ -#define FLUSH_BLOCK(s, last) { \ - FLUSH_BLOCK_ONLY(s, last); \ - if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ -} - -/* Maximum stored block length in deflate format (not including header). */ -#define MAX_STORED 65535 - -/* Minimum of a and b. */ -#define MIN(a, b) ((a) > (b) ? (b) : (a)) - -/* =========================================================================== - * Copy without compression as much as possible from the input stream, return - * the current block state. - * - * In case deflateParams() is used to later switch to a non-zero compression - * level, s->matches (otherwise unused when storing) keeps track of the number - * of hash table slides to perform. If s->matches is 1, then one hash table - * slide will be done when switching. If s->matches is 2, the maximum value - * allowed here, then the hash table will be cleared, since two or more slides - * is the same as a clear. - * - * deflate_stored() is written to minimize the number of times an input byte is - * copied. It is most efficient with large input and output buffers, which - * maximizes the opportunites to have a single copy from next_in to next_out. - */ -local block_state deflate_stored(s, flush) - deflate_state *s; - int flush; -{ - /* Smallest worthy block size when not flushing or finishing. By default - * this is 32K. This can be as small as 507 bytes for memLevel == 1. For - * large input and output buffers, the stored block size will be larger. - */ - unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); - - /* Copy as many min_block or larger stored blocks directly to next_out as - * possible. If flushing, copy the remaining available input to next_out as - * stored blocks, if there is enough space. - */ - unsigned len, left, have, last = 0; - unsigned used = s->strm->avail_in; - do { - /* Set len to the maximum size block that we can copy directly with the - * available input data and output space. Set left to how much of that - * would be copied from what's left in the window. - */ - len = MAX_STORED; /* maximum deflate stored block length */ - have = (s->bi_valid + 42) >> 3; /* number of header bytes */ - if (s->strm->avail_out < have) /* need room for header */ - break; - /* maximum stored block length that will fit in avail_out: */ - have = s->strm->avail_out - have; - left = s->strstart - s->block_start; /* bytes left in window */ - if (len > (ulg)left + s->strm->avail_in) - len = left + s->strm->avail_in; /* limit len to the input */ - if (len > have) - len = have; /* limit len to the output */ - - /* If the stored block would be less than min_block in length, or if - * unable to copy all of the available input when flushing, then try - * copying to the window and the pending buffer instead. Also don't - * write an empty block when flushing -- deflate() does that. - */ - if (len < min_block && ((len == 0 && flush != Z_FINISH) || - flush == Z_NO_FLUSH || - len != left + s->strm->avail_in)) - break; - - /* Make a dummy stored block in pending to get the header bytes, - * including any pending bits. This also updates the debugging counts. - */ - last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; - _tr_stored_block(s, (char *)0, 0L, last); - - /* Replace the lengths in the dummy stored block with len. */ - s->pending_buf[s->pending - 4] = len; - s->pending_buf[s->pending - 3] = len >> 8; - s->pending_buf[s->pending - 2] = ~len; - s->pending_buf[s->pending - 1] = ~len >> 8; - - /* Write the stored block header bytes. */ - flush_pending(s->strm); - -#ifdef ZLIB_DEBUG - /* Update debugging counts for the data about to be copied. */ - s->compressed_len += len << 3; - s->bits_sent += len << 3; -#endif - - /* Copy uncompressed bytes from the window to next_out. */ - if (left) { - if (left > len) - left = len; - zmemcpy(s->strm->next_out, s->window + s->block_start, left); - s->strm->next_out += left; - s->strm->avail_out -= left; - s->strm->total_out += left; - s->block_start += left; - len -= left; - } - - /* Copy uncompressed bytes directly from next_in to next_out, updating - * the check value. - */ - if (len) { - read_buf(s->strm, s->strm->next_out, len); - s->strm->next_out += len; - s->strm->avail_out -= len; - s->strm->total_out += len; - } - } while (last == 0); - - /* Update the sliding window with the last s->w_size bytes of the copied - * data, or append all of the copied data to the existing window if less - * than s->w_size bytes were copied. Also update the number of bytes to - * insert in the hash tables, in the event that deflateParams() switches to - * a non-zero compression level. - */ - used -= s->strm->avail_in; /* number of input bytes directly copied */ - if (used) { - /* If any input was used, then no unused input remains in the window, - * therefore s->block_start == s->strstart. - */ - if (used >= s->w_size) { /* supplant the previous history */ - s->matches = 2; /* clear hash */ - zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); - s->strstart = s->w_size; - } - else { - if (s->window_size - s->strstart <= used) { - /* Slide the window down. */ - s->strstart -= s->w_size; - zmemcpy(s->window, s->window + s->w_size, s->strstart); - if (s->matches < 2) - s->matches++; /* add a pending slide_hash() */ - } - zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); - s->strstart += used; - } - s->block_start = s->strstart; - s->insert += MIN(used, s->w_size - s->insert); - } - if (s->high_water < s->strstart) - s->high_water = s->strstart; - - /* If the last block was written to next_out, then done. */ - if (last) - return finish_done; - - /* If flushing and all input has been consumed, then done. */ - if (flush != Z_NO_FLUSH && flush != Z_FINISH && - s->strm->avail_in == 0 && (long)s->strstart == s->block_start) - return block_done; - - /* Fill the window with any remaining input. */ - have = s->window_size - s->strstart - 1; - if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { - /* Slide the window down. */ - s->block_start -= s->w_size; - s->strstart -= s->w_size; - zmemcpy(s->window, s->window + s->w_size, s->strstart); - if (s->matches < 2) - s->matches++; /* add a pending slide_hash() */ - have += s->w_size; /* more space now */ - } - if (have > s->strm->avail_in) - have = s->strm->avail_in; - if (have) { - read_buf(s->strm, s->window + s->strstart, have); - s->strstart += have; - } - if (s->high_water < s->strstart) - s->high_water = s->strstart; - - /* There was not enough avail_out to write a complete worthy or flushed - * stored block to next_out. Write a stored block to pending instead, if we - * have enough input for a worthy block, or if flushing and there is enough - * room for the remaining input as a stored block in the pending buffer. - */ - have = (s->bi_valid + 42) >> 3; /* number of header bytes */ - /* maximum stored block length that will fit in pending: */ - have = MIN(s->pending_buf_size - have, MAX_STORED); - min_block = MIN(have, s->w_size); - left = s->strstart - s->block_start; - if (left >= min_block || - ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && - s->strm->avail_in == 0 && left <= have)) { - len = MIN(left, have); - last = flush == Z_FINISH && s->strm->avail_in == 0 && - len == left ? 1 : 0; - _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); - s->block_start += len; - flush_pending(s->strm); - } - - /* We've done all we can with the available input and output. */ - return last ? finish_started : need_more; -} - -/* =========================================================================== - * Compress as much as possible from the input stream, return the current - * block state. - * This function does not perform lazy evaluation of matches and inserts - * new strings in the dictionary only for unmatched strings or for short - * matches. It is used only for the fast compression options. - */ -local block_state deflate_fast(s, flush) - deflate_state *s; - int flush; -{ - IPos hash_head; /* head of the hash chain */ - int bflush; /* set if current block must be flushed */ - - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - hash_head = NIL; - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - * At this point we have always match_length < MIN_MATCH - */ - if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - s->match_length = longest_match (s, hash_head); - /* longest_match() sets match_start */ - } - if (s->match_length >= MIN_MATCH) { - check_match(s, s->strstart, s->match_start, s->match_length); - - _tr_tally_dist(s, s->strstart - s->match_start, - s->match_length - MIN_MATCH, bflush); - - s->lookahead -= s->match_length; - - /* Insert new strings in the hash table only if the match length - * is not too large. This saves time but degrades compression. - */ -#ifndef FASTEST - if (s->match_length <= s->max_insert_length && - s->lookahead >= MIN_MATCH) { - s->match_length--; /* string at strstart already in table */ - do { - s->strstart++; - INSERT_STRING(s, s->strstart, hash_head); - /* strstart never exceeds WSIZE-MAX_MATCH, so there are - * always MIN_MATCH bytes ahead. - */ - } while (--s->match_length != 0); - s->strstart++; - } else -#endif - { - s->strstart += s->match_length; - s->match_length = 0; - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not - * matter since it will be recomputed at next deflate call. - */ - } - } else { - /* No match, output a literal byte */ - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - } - if (bflush) FLUSH_BLOCK(s, 0); - } - s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} - -#ifndef FASTEST -/* =========================================================================== - * Same as above, but achieves better compression. We use a lazy - * evaluation for matches: a match is finally adopted only if there is - * no better match at the next window position. - */ -local block_state deflate_slow(s, flush) - deflate_state *s; - int flush; -{ - IPos hash_head; /* head of hash chain */ - int bflush; /* set if current block must be flushed */ - - /* Process the input block. */ - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - hash_head = NIL; - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - */ - s->prev_length = s->match_length, s->prev_match = s->match_start; - s->match_length = MIN_MATCH-1; - - if (hash_head != NIL && s->prev_length < s->max_lazy_match && - s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - s->match_length = longest_match (s, hash_head); - /* longest_match() sets match_start */ - - if (s->match_length <= 5 && (s->strategy == Z_FILTERED -#if TOO_FAR <= 32767 - || (s->match_length == MIN_MATCH && - s->strstart - s->match_start > TOO_FAR) -#endif - )) { - - /* If prev_match is also MIN_MATCH, match_start is garbage - * but we will ignore the current match anyway. - */ - s->match_length = MIN_MATCH-1; - } - } - /* If there was a match at the previous step and the current - * match is not better, output the previous match: - */ - if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { - uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; - /* Do not insert strings in hash table beyond this. */ - - check_match(s, s->strstart-1, s->prev_match, s->prev_length); - - _tr_tally_dist(s, s->strstart -1 - s->prev_match, - s->prev_length - MIN_MATCH, bflush); - - /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not - * enough lookahead, the last two strings are not inserted in - * the hash table. - */ - s->lookahead -= s->prev_length-1; - s->prev_length -= 2; - do { - if (++s->strstart <= max_insert) { - INSERT_STRING(s, s->strstart, hash_head); - } - } while (--s->prev_length != 0); - s->match_available = 0; - s->match_length = MIN_MATCH-1; - s->strstart++; - - if (bflush) FLUSH_BLOCK(s, 0); - - } else if (s->match_available) { - /* If there was no match at the previous position, output a - * single literal. If there was a match but the current match - * is longer, truncate the previous match to a single literal. - */ - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); - if (bflush) { - FLUSH_BLOCK_ONLY(s, 0); - } - s->strstart++; - s->lookahead--; - if (s->strm->avail_out == 0) return need_more; - } else { - /* There is no previous match to compare with, wait for - * the next step to decide. - */ - s->match_available = 1; - s->strstart++; - s->lookahead--; - } - } - Assert (flush != Z_NO_FLUSH, "no flush?"); - if (s->match_available) { - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); - s->match_available = 0; - } - s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} -#endif /* FASTEST */ - -/* =========================================================================== - * For Z_RLE, simply look for runs of bytes, generate matches only of distance - * one. Do not maintain a hash table. (It will be regenerated if this run of - * deflate switches away from Z_RLE.) - */ -local block_state deflate_rle(s, flush) - deflate_state *s; - int flush; -{ - int bflush; /* set if current block must be flushed */ - uInt prev; /* byte at distance one to match */ - Bytef *scan, *strend; /* scan goes up to strend for length of run */ - - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the longest run, plus one for the unrolled loop. - */ - if (s->lookahead <= MAX_MATCH) { - fill_window(s); - if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* See how many times the previous byte repeats */ - s->match_length = 0; - if (s->lookahead >= MIN_MATCH && s->strstart > 0) { - scan = s->window + s->strstart - 1; - prev = *scan; - if (prev == *++scan && prev == *++scan && prev == *++scan) { - strend = s->window + s->strstart + MAX_MATCH; - do { - } while (prev == *++scan && prev == *++scan && - prev == *++scan && prev == *++scan && - prev == *++scan && prev == *++scan && - prev == *++scan && prev == *++scan && - scan < strend); - s->match_length = MAX_MATCH - (uInt)(strend - scan); - if (s->match_length > s->lookahead) - s->match_length = s->lookahead; - } - Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); - } - - /* Emit match if have run of MIN_MATCH or longer, else emit literal */ - if (s->match_length >= MIN_MATCH) { - check_match(s, s->strstart, s->strstart - 1, s->match_length); - - _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); - - s->lookahead -= s->match_length; - s->strstart += s->match_length; - s->match_length = 0; - } else { - /* No match, output a literal byte */ - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - } - if (bflush) FLUSH_BLOCK(s, 0); - } - s->insert = 0; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} - -/* =========================================================================== - * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. - * (It will be regenerated if this run of deflate switches away from Huffman.) - */ -local block_state deflate_huff(s, flush) - deflate_state *s; - int flush; -{ - int bflush; /* set if current block must be flushed */ - - for (;;) { - /* Make sure that we have a literal to write. */ - if (s->lookahead == 0) { - fill_window(s); - if (s->lookahead == 0) { - if (flush == Z_NO_FLUSH) - return need_more; - break; /* flush the current block */ - } - } - - /* Output a literal byte */ - s->match_length = 0; - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - if (bflush) FLUSH_BLOCK(s, 0); - } - s->insert = 0; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} diff --git a/base/poco/Foundation/src/deflate.h b/base/poco/Foundation/src/deflate.h deleted file mode 100644 index 5dd6e41bb29..00000000000 --- a/base/poco/Foundation/src/deflate.h +++ /dev/null @@ -1,355 +0,0 @@ -/* deflate.h -- internal compression state - * Copyright (C) 1995-2016 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* @(#) $Id$ */ - -#ifndef DEFLATE_H -#define DEFLATE_H - -#include "zutil.h" - -/* define NO_GZIP when compiling if you want to disable gzip header and - trailer creation by deflate(). NO_GZIP would be used to avoid linking in - the crc code when it is not needed. For shared libraries, gzip encoding - should be left enabled. */ -#ifndef NO_GZIP -# define GZIP -#endif - -/* =========================================================================== - * Internal compression state. - */ - -#define LENGTH_CODES 29 -/* number of length codes, not counting the special END_BLOCK code */ - -#define LITERALS 256 -/* number of literal bytes 0..255 */ - -#define L_CODES (LITERALS + 1 + LENGTH_CODES) -/* number of Literal or Length codes, including the END_BLOCK code */ - -#define D_CODES 30 -/* number of distance codes */ - -#define BL_CODES 19 -/* number of codes used to transfer the bit lengths */ - -#define HEAP_SIZE (2 * L_CODES + 1) -/* maximum heap size */ - -#define MAX_BITS 15 -/* All codes must not exceed MAX_BITS bits */ - -#define Buf_size 16 -/* size of bit buffer in bi_buf */ - -#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ -#ifdef GZIP -# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ -#endif -#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ -#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ -#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ -#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ -#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ -#define FINISH_STATE 666 /* stream complete */ -/* Stream status */ - - -/* Data structure describing a single value and its code string. */ -typedef struct ct_data_s -{ - union - { - ush freq; /* frequency count */ - ush code; /* bit string */ - } fc; - union - { - ush dad; /* father node in Huffman tree */ - ush len; /* length of bit string */ - } dl; -} FAR ct_data; - -#define Freq fc.freq -#define Code fc.code -#define Dad dl.dad -#define Len dl.len - -typedef struct static_tree_desc_s static_tree_desc; - -typedef struct tree_desc_s -{ - ct_data * dyn_tree; /* the dynamic tree */ - int max_code; /* largest code with non zero frequency */ - const static_tree_desc * stat_desc; /* the corresponding static tree */ -} FAR tree_desc; - -typedef ush Pos; -typedef Pos FAR Posf; -typedef unsigned IPos; - -/* A Pos is an index in the character window. We use short instead of int to - * save space in the various tables. IPos is used only for parameter passing. - */ - -typedef struct internal_state -{ - z_streamp strm; /* pointer back to this zlib stream */ - int status; /* as the name implies */ - Bytef * pending_buf; /* output still pending */ - ulg pending_buf_size; /* size of pending_buf */ - Bytef * pending_out; /* next pending byte to output to the stream */ - ulg pending; /* nb of bytes in the pending buffer */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ - gz_headerp gzhead; /* gzip header information to write */ - ulg gzindex; /* where in extra, name, or comment */ - Byte method; /* can only be DEFLATED */ - int last_flush; /* value of flush param for previous deflate call */ - - /* used by deflate.c: */ - - uInt w_size; /* LZ77 window size (32K by default) */ - uInt w_bits; /* log2(w_size) (8..16) */ - uInt w_mask; /* w_size - 1 */ - - Bytef * window; - /* Sliding window. Input bytes are read into the second half of the window, - * and move to the first half later to keep a dictionary of at least wSize - * bytes. With this organization, matches are limited to a distance of - * wSize-MAX_MATCH bytes, but this ensures that IO is always - * performed with a length multiple of the block size. Also, it limits - * the window size to 64K, which is quite useful on MS-DOS. - * To do: use the user input buffer as sliding window. - */ - - ulg window_size; - /* Actual size of window: 2*wSize, except when the user input buffer - * is directly used as sliding window. - */ - - Posf * prev; - /* Link to older string with same hash index. To limit the size of this - * array to 64K, this link is maintained only for the last 32K strings. - * An index in this array is thus a window index modulo 32K. - */ - - Posf * head; /* Heads of the hash chains or NIL. */ - - uInt ins_h; /* hash index of string to be inserted */ - uInt hash_size; /* number of elements in hash table */ - uInt hash_bits; /* log2(hash_size) */ - uInt hash_mask; /* hash_size-1 */ - - uInt hash_shift; - /* Number of bits by which ins_h must be shifted at each input - * step. It must be such that after MIN_MATCH steps, the oldest - * byte no longer takes part in the hash key, that is: - * hash_shift * MIN_MATCH >= hash_bits - */ - - long block_start; - /* Window position at the beginning of the current output block. Gets - * negative when the window is moved backwards. - */ - - uInt match_length; /* length of best match */ - IPos prev_match; /* previous match */ - int match_available; /* set if previous match exists */ - uInt strstart; /* start of string to insert */ - uInt match_start; /* start of matching string */ - uInt lookahead; /* number of valid bytes ahead in window */ - - uInt prev_length; - /* Length of the best match at previous step. Matches not greater than this - * are discarded. This is used in the lazy match evaluation. - */ - - uInt max_chain_length; - /* To speed up deflation, hash chains are never searched beyond this - * length. A higher limit improves compression ratio but degrades the - * speed. - */ - - uInt max_lazy_match; - /* Attempt to find a better match only when the current match is strictly - * smaller than this value. This mechanism is used only for compression - * levels >= 4. - */ -#define max_insert_length max_lazy_match - /* Insert new strings in the hash table only if the match length is not - * greater than this length. This saves time but degrades compression. - * max_insert_length is used only for compression levels <= 3. - */ - - int level; /* compression level (1..9) */ - int strategy; /* favor or force Huffman coding*/ - - uInt good_match; - /* Use a faster search when the previous match is longer than this */ - - int nice_match; /* Stop searching when current match exceeds this */ - - /* used by trees.c: */ - /* Didn't use ct_data typedef below to suppress compiler warning */ - struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ - struct ct_data_s dyn_dtree[2 * D_CODES + 1]; /* distance tree */ - struct ct_data_s bl_tree[2 * BL_CODES + 1]; /* Huffman tree for bit lengths */ - - struct tree_desc_s l_desc; /* desc. for literal tree */ - struct tree_desc_s d_desc; /* desc. for distance tree */ - struct tree_desc_s bl_desc; /* desc. for bit length tree */ - - ush bl_count[MAX_BITS + 1]; - /* number of codes at each bit length for an optimal tree */ - - int heap[2 * L_CODES + 1]; /* heap used to build the Huffman trees */ - int heap_len; /* number of elements in the heap */ - int heap_max; /* element of largest frequency */ - /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. - * The same heap array is used to build all trees. - */ - - uch depth[2 * L_CODES + 1]; - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - - uchf * l_buf; /* buffer for literals or lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for - * limiting lit_bufsize to 64K: - * - frequencies can be kept in 16 bit counters - * - if compression is not successful for the first block, all input - * data is still in the window so we can still emit a stored block even - * when input comes from standard input. (This can also be done for - * all blocks if lit_bufsize is not greater than 32K.) - * - if compression is not successful for a file smaller than 64K, we can - * even emit a stored file instead of a stored block (saving 5 bytes). - * This is applicable only for zip (not gzip or zlib). - * - creating new Huffman trees less frequently may not provide fast - * adaptation to changes in the input data statistics. (Take for - * example a binary file with poorly compressible code followed by - * a highly compressible string table.) Smaller buffer sizes give - * fast adaptation but have of course the overhead of transmitting - * trees more frequently. - * - I can't count above 4 - */ - - uInt last_lit; /* running index in l_buf */ - - ushf * d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ - uInt matches; /* number of string matches in current block */ - uInt insert; /* bytes at end of window left to insert */ - -#ifdef ZLIB_DEBUG - ulg compressed_len; /* total bit length of compressed file mod 2^32 */ - ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ -#endif - - ush bi_buf; - /* Output buffer. bits are inserted starting at the bottom (least - * significant bits). - */ - int bi_valid; - /* Number of valid bits in bi_buf. All bits above the last valid bit - * are always zero. - */ - - ulg high_water; - /* High water mark offset in window for initialized bytes -- bytes above - * this are set to zero in order to avoid memory check warnings when - * longest match routines access bytes past the input. This is then - * updated to the new high water mark. - */ - -} FAR deflate_state; - -/* Output a byte on the stream. - * IN assertion: there is enough room in pending_buf. - */ -#define put_byte(s, c) \ - { \ - s->pending_buf[s->pending++] = (Bytef)(c); \ - } - - -#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - -#define MAX_DIST(s) ((s)->w_size - MIN_LOOKAHEAD) -/* In order to simplify the code, particularly on 16 bit machines, match - * distances are limited to MAX_DIST instead of WSIZE. - */ - -#define WIN_INIT MAX_MATCH -/* Number of bytes after end of data in window to initialize in order to avoid - memory checker errors from longest match routines */ - -/* in trees.c */ -void ZLIB_INTERNAL _tr_init OF((deflate_state * s)); -int ZLIB_INTERNAL _tr_tally OF((deflate_state * s, unsigned dist, unsigned lc)); -void ZLIB_INTERNAL _tr_flush_block OF((deflate_state * s, charf * buf, ulg stored_len, int last)); -void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state * s)); -void ZLIB_INTERNAL _tr_align OF((deflate_state * s)); -void ZLIB_INTERNAL _tr_stored_block OF((deflate_state * s, charf * buf, ulg stored_len, int last)); - -#define d_code(dist) ((dist) < 256 ? _dist_code[dist] : _dist_code[256 + ((dist) >> 7)]) -/* Mapping from a distance to a distance code. dist is the distance - 1 and - * must not have side effects. _dist_code[256] and _dist_code[257] are never - * used. - */ - -#ifndef ZLIB_DEBUG -/* Inline versions of _tr_tally for speed: */ - -# if defined(GEN_TREES_H) || !defined(STDC) -extern uch ZLIB_INTERNAL _length_code[]; -extern uch ZLIB_INTERNAL _dist_code[]; -# else -extern const uch ZLIB_INTERNAL _length_code[]; -extern const uch ZLIB_INTERNAL _dist_code[]; -# endif - -# define _tr_tally_lit(s, c, flush) \ - { \ - uch cc = (c); \ - s->d_buf[s->last_lit] = 0; \ - s->l_buf[s->last_lit++] = cc; \ - s->dyn_ltree[cc].Freq++; \ - flush = (s->last_lit == s->lit_bufsize - 1); \ - } -# define _tr_tally_dist(s, distance, length, flush) \ - { \ - uch len = (uch)(length); \ - ush dist = (ush)(distance); \ - s->d_buf[s->last_lit] = dist; \ - s->l_buf[s->last_lit++] = len; \ - dist--; \ - s->dyn_ltree[_length_code[len] + LITERALS + 1].Freq++; \ - s->dyn_dtree[d_code(dist)].Freq++; \ - flush = (s->last_lit == s->lit_bufsize - 1); \ - } -#else -# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) -# define _tr_tally_dist(s, distance, length, flush) flush = _tr_tally(s, distance, length) -#endif - -#endif /* DEFLATE_H */ diff --git a/base/poco/Foundation/src/diy-fp.cc b/base/poco/Foundation/src/diy-fp.cc deleted file mode 100644 index ddd1891b168..00000000000 --- a/base/poco/Foundation/src/diy-fp.cc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#include "diy-fp.h" -#include "utils.h" - -namespace double_conversion { - -void DiyFp::Multiply(const DiyFp& other) { - // Simply "emulates" a 128 bit multiplication. - // However: the resulting number only contains 64 bits. The least - // significant 64 bits are only used for rounding the most significant 64 - // bits. - const uint64_t kM32 = 0xFFFFFFFFU; - uint64_t a = f_ >> 32; - uint64_t b = f_ & kM32; - uint64_t c = other.f_ >> 32; - uint64_t d = other.f_ & kM32; - uint64_t ac = a * c; - uint64_t bc = b * c; - uint64_t ad = a * d; - uint64_t bd = b * d; - uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); - // By adding 1U << 31 to tmp we round the final result. - // Halfway cases will be round up. - tmp += 1U << 31; - uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); - e_ += other.e_ + 64; - f_ = result_f; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/diy-fp.h b/base/poco/Foundation/src/diy-fp.h deleted file mode 100644 index 03581bc16ae..00000000000 --- a/base/poco/Foundation/src/diy-fp.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DIY_FP_H_ -#define DOUBLE_CONVERSION_DIY_FP_H_ - -#include "utils.h" - -namespace double_conversion -{ - -// This "Do It Yourself Floating Point" class implements a floating-point number -// with a uint64 significand and an int exponent. Normalized DiyFp numbers will -// have the most significant bit of the significand set. -// Multiplication and Subtraction do not normalize their results. -// DiyFp are not designed to contain special doubles (NaN and Infinity). -class DiyFp -{ -public: - static const int kSignificandSize = 64; - - DiyFp() : f_(0), e_(0) { } - DiyFp(uint64_t f, int e) : f_(f), e_(e) { } - - // this = this - other. - // The exponents of both numbers must be the same and the significand of this - // must be bigger than the significand of other. - // The result will not be normalized. - void Subtract(const DiyFp & other) - { - ASSERT(e_ == other.e_); - ASSERT(f_ >= other.f_); - f_ -= other.f_; - } - - // Returns a - b. - // The exponents of both numbers must be the same and this must be bigger - // than other. The result will not be normalized. - static DiyFp Minus(const DiyFp & a, const DiyFp & b) - { - DiyFp result = a; - result.Subtract(b); - return result; - } - - - // this = this * other. - void Multiply(const DiyFp & other); - - // returns a * b; - static DiyFp Times(const DiyFp & a, const DiyFp & b) - { - DiyFp result = a; - result.Multiply(b); - return result; - } - - void Normalize() - { - ASSERT(f_ != 0); - uint64_t f = f_; - int e = e_; - - // This method is mainly called for normalizing boundaries. In general - // boundaries need to be shifted by 10 bits. We thus optimize for this case. - const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); - while ((f & k10MSBits) == 0) - { - f <<= 10; - e -= 10; - } - while ((f & kUint64MSB) == 0) - { - f <<= 1; - e--; - } - f_ = f; - e_ = e; - } - - static DiyFp Normalize(const DiyFp & a) - { - DiyFp result = a; - result.Normalize(); - return result; - } - - uint64_t f() const { return f_; } - int e() const { return e_; } - - void set_f(uint64_t new_value) { f_ = new_value; } - void set_e(int new_value) { e_ = new_value; } - -private: - static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); - - uint64_t f_; - int e_; -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DIY_FP_H_ diff --git a/base/poco/Foundation/src/double-conversion.cc b/base/poco/Foundation/src/double-conversion.cc deleted file mode 100644 index 39ad2461e9b..00000000000 --- a/base/poco/Foundation/src/double-conversion.cc +++ /dev/null @@ -1,911 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include "double-conversion.h" - -#include "bignum-dtoa.h" -#include "fast-dtoa.h" -#include "fixed-dtoa.h" -#include "ieee.h" -#include "strtod.h" -#include "utils.h" - -namespace double_conversion { - -const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { - int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; - static DoubleToStringConverter converter(flags, - "Infinity", - "NaN", - 'e', - -6, 21, - 6, 0); - return converter; -} - - -bool DoubleToStringConverter::HandleSpecialValues( - double value, - StringBuilder* result_builder) const { - Double double_inspect(value); - if (double_inspect.IsInfinite()) { - if (infinity_symbol_ == NULL) return false; - if (value < 0) { - result_builder->AddCharacter('-'); - } - result_builder->AddString(infinity_symbol_); - return true; - } - if (double_inspect.IsNan()) { - if (nan_symbol_ == NULL) return false; - result_builder->AddString(nan_symbol_); - return true; - } - return false; -} - - -void DoubleToStringConverter::CreateExponentialRepresentation( - const char* decimal_digits, - int length, - int exponent, - StringBuilder* result_builder) const { - ASSERT(length != 0); - result_builder->AddCharacter(decimal_digits[0]); - if (length != 1) { - result_builder->AddCharacter('.'); - result_builder->AddSubstring(&decimal_digits[1], length-1); - } - result_builder->AddCharacter(exponent_character_); - if (exponent < 0) { - result_builder->AddCharacter('-'); - exponent = -exponent; - } else { - if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { - result_builder->AddCharacter('+'); - } - } - if (exponent == 0) { - result_builder->AddCharacter('0'); - return; - } - ASSERT(exponent < 1e4); - const int kMaxExponentLength = 5; - char buffer[kMaxExponentLength + 1]; - buffer[kMaxExponentLength] = '\0'; - int first_char_pos = kMaxExponentLength; - while (exponent > 0) { - buffer[--first_char_pos] = '0' + (exponent % 10); - exponent /= 10; - } - result_builder->AddSubstring(&buffer[first_char_pos], - kMaxExponentLength - first_char_pos); -} - - -void DoubleToStringConverter::CreateDecimalRepresentation( - const char* decimal_digits, - int length, - int decimal_point, - int digits_after_point, - StringBuilder* result_builder) const { - // Create a representation that is padded with zeros if needed. - if (decimal_point <= 0) { - // "0.00000decimal_rep". - result_builder->AddCharacter('0'); - if (digits_after_point > 0) { - result_builder->AddCharacter('.'); - result_builder->AddPadding('0', -decimal_point); - ASSERT(length <= digits_after_point - (-decimal_point)); - result_builder->AddSubstring(decimal_digits, length); - int remaining_digits = digits_after_point - (-decimal_point) - length; - result_builder->AddPadding('0', remaining_digits); - } - } else if (decimal_point >= length) { - // "decimal_rep0000.00000" or "decimal_rep.0000" - result_builder->AddSubstring(decimal_digits, length); - result_builder->AddPadding('0', decimal_point - length); - if (digits_after_point > 0) { - result_builder->AddCharacter('.'); - result_builder->AddPadding('0', digits_after_point); - } - } else { - // "decima.l_rep000" - ASSERT(digits_after_point > 0); - result_builder->AddSubstring(decimal_digits, decimal_point); - result_builder->AddCharacter('.'); - ASSERT(length - decimal_point <= digits_after_point); - result_builder->AddSubstring(&decimal_digits[decimal_point], - length - decimal_point); - int remaining_digits = digits_after_point - (length - decimal_point); - result_builder->AddPadding('0', remaining_digits); - } - if (digits_after_point == 0) { - if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { - result_builder->AddCharacter('.'); - } - if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { - result_builder->AddCharacter('0'); - } - } -} - - -bool DoubleToStringConverter::ToShortestIeeeNumber( - double value, - StringBuilder* result_builder, - DoubleToStringConverter::DtoaMode mode) const { - ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - int decimal_point; - bool sign; - const int kDecimalRepCapacity = kBase10MaximalLength + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - - bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - int exponent = decimal_point - 1; - if ((decimal_in_shortest_low_ <= exponent) && - (exponent < decimal_in_shortest_high_)) { - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, - decimal_point, - Max(0, decimal_rep_length - decimal_point), - result_builder); - } else { - CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, - result_builder); - } - return true; -} - - -bool DoubleToStringConverter::ToFixed(double value, - int requested_digits, - StringBuilder* result_builder) const { - ASSERT(kMaxFixedDigitsBeforePoint == 60); - const double kFirstNonFixed = 1e60; - - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (requested_digits > kMaxFixedDigitsAfterPoint) return false; - if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; - - // Find a sufficiently precise decimal representation of n. - int decimal_point; - bool sign; - // Add space for the '\0' byte. - const int kDecimalRepCapacity = - kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - DoubleToAscii(value, FIXED, requested_digits, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, - requested_digits, result_builder); - return true; -} - - -bool DoubleToStringConverter::ToExponential( - double value, - int requested_digits, - StringBuilder* result_builder) const { - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (requested_digits < -1) return false; - if (requested_digits > kMaxExponentialDigits) return false; - - int decimal_point; - bool sign; - // Add space for digit before the decimal point and the '\0' character. - const int kDecimalRepCapacity = kMaxExponentialDigits + 2; - ASSERT(kDecimalRepCapacity > kBase10MaximalLength); - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - if (requested_digits == -1) { - DoubleToAscii(value, SHORTEST, 0, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - } else { - DoubleToAscii(value, PRECISION, requested_digits + 1, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - ASSERT(decimal_rep_length <= requested_digits + 1); - - for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { - decimal_rep[i] = '0'; - } - decimal_rep_length = requested_digits + 1; - } - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - int exponent = decimal_point - 1; - CreateExponentialRepresentation(decimal_rep, - decimal_rep_length, - exponent, - result_builder); - return true; -} - - -bool DoubleToStringConverter::ToPrecision(double value, - int precision, - StringBuilder* result_builder) const { - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { - return false; - } - - // Find a sufficiently precise decimal representation of n. - int decimal_point; - bool sign; - // Add one for the terminating null character. - const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - DoubleToAscii(value, PRECISION, precision, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - ASSERT(decimal_rep_length <= precision); - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - // The exponent if we print the number as x.xxeyyy. That is with the - // decimal point after the first digit. - int exponent = decimal_point - 1; - - int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; - if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || - (decimal_point - precision + extra_zero > - max_trailing_padding_zeroes_in_precision_mode_)) { - // Fill buffer to contain 'precision' digits. - // Usually the buffer is already at the correct length, but 'DoubleToAscii' - // is allowed to return less characters. - for (int i = decimal_rep_length; i < precision; ++i) { - decimal_rep[i] = '0'; - } - - CreateExponentialRepresentation(decimal_rep, - precision, - exponent, - result_builder); - } else { - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, - Max(0, precision - decimal_point), - result_builder); - } - return true; -} - - -static BignumDtoaMode DtoaToBignumDtoaMode( - DoubleToStringConverter::DtoaMode dtoa_mode) { - switch (dtoa_mode) { - case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST; - case DoubleToStringConverter::SHORTEST_SINGLE: - return BIGNUM_DTOA_SHORTEST_SINGLE; - case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED; - case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; - default: - UNREACHABLE(); - return BIGNUM_DTOA_SHORTEST; - } -} - - -void DoubleToStringConverter::DoubleToAscii(double v, - DtoaMode mode, - int requested_digits, - char* buffer, - int buffer_length, - bool* sign, - int* length, - int* point) { - Vector vector(buffer, buffer_length); - ASSERT(!Double(v).IsSpecial()); - ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); - - if (Double(v).Sign() < 0) { - *sign = true; - v = -v; - } else { - *sign = false; - } - - if (mode == PRECISION && requested_digits == 0) { - vector[0] = '\0'; - *length = 0; - return; - } - - if (v == 0) { - vector[0] = '0'; - vector[1] = '\0'; - *length = 1; - *point = 1; - return; - } - - bool fast_worked; - switch (mode) { - case SHORTEST: - fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); - break; - case SHORTEST_SINGLE: - fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, - vector, length, point); - break; - case FIXED: - fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); - break; - case PRECISION: - fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, - vector, length, point); - break; - default: - fast_worked = false; - UNREACHABLE(); - } - if (fast_worked) return; - - // If the fast dtoa didn't succeed use the slower bignum version. - BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); - BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); - vector[*length] = '\0'; -} - - -// Consumes the given substring from the iterator. -// Returns false, if the substring does not match. -static bool ConsumeSubString(const char** current, - const char* end, - const char* substring) { - ASSERT(**current == *substring); - for (substring++; *substring != '\0'; substring++) { - ++*current; - if (*current == end || **current != *substring) return false; - } - ++*current; - return true; -} - - -// Maximum number of significant digits in decimal representation. -// The longest possible double in decimal representation is -// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 -// (768 digits). If we parse a number whose first digits are equal to a -// mean of 2 adjacent doubles (that could have up to 769 digits) the result -// must be rounded to the bigger one unless the tail consists of zeros, so -// we don't need to preserve all the digits. -const int kMaxSignificantDigits = 772; - - -// Returns true if a nonspace found and false if the end has reached. -static inline bool AdvanceToNonspace(const char** current, const char* end) { - while (*current != end) { - if (**current != ' ') return true; - ++*current; - } - return false; -} - - -static bool isDigit(int x, int radix) { - return (x >= '0' && x <= '9' && x < '0' + radix) - || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) - || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); -} - - -static double SignedZero(bool sign) { - return sign ? -0.0 : 0.0; -} - - -// Returns true if 'c' is a decimal digit that is valid for the given radix. -// -// The function is small and could be inlined, but VS2012 emitted a warning -// because it constant-propagated the radix and concluded that the last -// condition was always true. By moving it into a separate function the -// compiler wouldn't warn anymore. -static bool IsDecimalDigitForRadix(int c, int radix) { - return '0' <= c && c <= '9' && (c - '0') < radix; -} - -// Returns true if 'c' is a character digit that is valid for the given radix. -// The 'a_character' should be 'a' or 'A'. -// -// The function is small and could be inlined, but VS2012 emitted a warning -// because it constant-propagated the radix and concluded that the first -// condition was always false. By moving it into a separate function the -// compiler wouldn't warn anymore. -static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { - return radix > 10 && c >= a_character && c < a_character + radix - 10; -} - - -// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. -template -static double RadixStringToIeee(const char* current, - const char* end, - bool sign, - bool allow_trailing_junk, - double junk_string_value, - bool read_as_double, - const char** trailing_pointer) { - ASSERT(current != end); - - const int kDoubleSize = Double::kSignificandSize; - const int kSingleSize = Single::kSignificandSize; - const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; - - // Skip leading 0s. - while (*current == '0') { - ++current; - if (current == end) { - *trailing_pointer = end; - return SignedZero(sign); - } - } - - int64_t number = 0; - int exponent = 0; - const int radix = (1 << radix_log_2); - - do { - int digit; - if (IsDecimalDigitForRadix(*current, radix)) { - digit = static_cast(*current) - '0'; - } else if (IsCharacterDigitForRadix(*current, radix, 'a')) { - digit = static_cast(*current) - 'a' + 10; - } else if (IsCharacterDigitForRadix(*current, radix, 'A')) { - digit = static_cast(*current) - 'A' + 10; - } else { - if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) { - break; - } else { - return junk_string_value; - } - } - - number = number * radix + digit; - int overflow = static_cast(number >> kSignificandSize); - if (overflow != 0) { - // Overflow occurred. Need to determine which direction to round the - // result. - int overflow_bits_count = 1; - while (overflow > 1) { - overflow_bits_count++; - overflow >>= 1; - } - - int dropped_bits_mask = ((1 << overflow_bits_count) - 1); - int dropped_bits = static_cast(number) & dropped_bits_mask; - number >>= overflow_bits_count; - exponent = overflow_bits_count; - - bool zero_tail = true; - for (;;) { - ++current; - if (current == end || !isDigit(*current, radix)) break; - zero_tail = zero_tail && *current == '0'; - exponent += radix_log_2; - } - - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value; - } - - int middle_value = (1 << (overflow_bits_count - 1)); - if (dropped_bits > middle_value) { - number++; // Rounding up. - } else if (dropped_bits == middle_value) { - // Rounding to even to consistency with decimals: half-way case rounds - // up if significant part is odd and down otherwise. - if ((number & 1) != 0 || !zero_tail) { - number++; // Rounding up. - } - } - - // Rounding up may cause overflow. - if ((number & ((int64_t)1 << kSignificandSize)) != 0) { - exponent++; - number >>= 1; - } - break; - } - ++current; - } while (current != end); - - ASSERT(number < ((int64_t)1 << kSignificandSize)); - ASSERT(static_cast(static_cast(number)) == number); - - *trailing_pointer = current; - - if (exponent == 0) { - if (sign) { - if (number == 0) return -0.0; - number = -number; - } - return static_cast(number); - } - - ASSERT(number != 0); - return Double(DiyFp(number, exponent)).value(); -} - - -double StringToDoubleConverter::StringToIeee( - const char* input, - int length, - int* processed_characters_count, - bool read_as_double) const { - const char* current = input; - const char* end = input + length; - - *processed_characters_count = 0; - - const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; - const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; - const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; - const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; - - // To make sure that iterator dereferencing is valid the following - // convention is used: - // 1. Each '++current' statement is followed by check for equality to 'end'. - // 2. If AdvanceToNonspace returned false then current == end. - // 3. If 'current' becomes equal to 'end' the function returns or goes to - // 'parsing_done'. - // 4. 'current' is not dereferenced after the 'parsing_done' label. - // 5. Code before 'parsing_done' may rely on 'current != end'. - if (current == end) return empty_string_value_; - - if (allow_leading_spaces || allow_trailing_spaces) { - if (!AdvanceToNonspace(¤t, end)) { - *processed_characters_count = static_cast(current - input); - return empty_string_value_; - } - if (!allow_leading_spaces && (input != current)) { - // No leading spaces allowed, but AdvanceToNonspace moved forward. - return junk_string_value_; - } - } - - // The longest form of simplified number is: "-.1eXXX\0". - const int kBufferSize = kMaxSignificantDigits + 10; - char buffer[kBufferSize]; // NOLINT: size is known at compile time. - int buffer_pos = 0; - - // Exponent will be adjusted if insignificant digits of the integer part - // or insignificant leading zeros of the fractional part are dropped. - int exponent = 0; - int significant_digits = 0; - int insignificant_digits = 0; - bool nonzero_digit_dropped = false; - - bool sign = false; - - if (*current == '+' || *current == '-') { - sign = (*current == '-'); - ++current; - const char* next_non_space = current; - // Skip following spaces (if allowed). - if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; - if (!allow_spaces_after_sign && (current != next_non_space)) { - return junk_string_value_; - } - current = next_non_space; - } - - if (infinity_symbol_ != NULL) { - if (*current == infinity_symbol_[0]) { - if (!ConsumeSubString(¤t, end, infinity_symbol_)) { - return junk_string_value_; - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - - ASSERT(buffer_pos == 0); - *processed_characters_count = static_cast(current - input); - return sign ? -Double::Infinity() : Double::Infinity(); - } - } - - if (nan_symbol_ != NULL) { - if (*current == nan_symbol_[0]) { - if (!ConsumeSubString(¤t, end, nan_symbol_)) { - return junk_string_value_; - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - - ASSERT(buffer_pos == 0); - *processed_characters_count = static_cast(current - input); - return sign ? -Double::NaN() : Double::NaN(); - } - } - - bool leading_zero = false; - if (*current == '0') { - ++current; - if (current == end) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - - leading_zero = true; - - // It could be hexadecimal value. - if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) { - ++current; - if (current == end || !isDigit(*current, 16)) { - return junk_string_value_; // "0x". - } - - const char* tail_pointer = NULL; - double result = RadixStringToIeee<4>(current, - end, - sign, - allow_trailing_junk, - junk_string_value_, - read_as_double, - &tail_pointer); - if (tail_pointer != NULL) { - if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end); - *processed_characters_count = static_cast(tail_pointer - input); - } - return result; - } - - // Ignore leading zeros in the integer part. - while (*current == '0') { - ++current; - if (current == end) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - } - } - - bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; - - // Copy significant digits of the integer part (if any) to the buffer. - while (*current >= '0' && *current <= '9') { - if (significant_digits < kMaxSignificantDigits) { - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos++] = static_cast(*current); - significant_digits++; - // Will later check if it's an octal in the buffer. - } else { - insignificant_digits++; // Move the digit into the exponential part. - nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; - } - octal = octal && *current < '8'; - ++current; - if (current == end) goto parsing_done; - } - - if (significant_digits == 0) { - octal = false; - } - - if (*current == '.') { - if (octal && !allow_trailing_junk) return junk_string_value_; - if (octal) goto parsing_done; - - ++current; - if (current == end) { - if (significant_digits == 0 && !leading_zero) { - return junk_string_value_; - } else { - goto parsing_done; - } - } - - if (significant_digits == 0) { - // octal = false; - // Integer part consists of 0 or is absent. Significant digits start after - // leading zeros (if any). - while (*current == '0') { - ++current; - if (current == end) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - exponent--; // Move this 0 into the exponent. - } - } - - // There is a fractional part. - // We don't emit a '.', but adjust the exponent instead. - while (*current >= '0' && *current <= '9') { - if (significant_digits < kMaxSignificantDigits) { - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos++] = static_cast(*current); - significant_digits++; - exponent--; - } else { - // Ignore insignificant digits in the fractional part. - nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; - } - ++current; - if (current == end) goto parsing_done; - } - } - - if (!leading_zero && exponent == 0 && significant_digits == 0) { - // If leading_zeros is true then the string contains zeros. - // If exponent < 0 then string was [+-]\.0*... - // If significant_digits != 0 the string is not equal to 0. - // Otherwise there are no digits in the string. - return junk_string_value_; - } - - // Parse exponential part. - if (*current == 'e' || *current == 'E') { - if (octal && !allow_trailing_junk) return junk_string_value_; - if (octal) goto parsing_done; - ++current; - if (current == end) { - if (allow_trailing_junk) { - goto parsing_done; - } else { - return junk_string_value_; - } - } - char sign = '+'; - if (*current == '+' || *current == '-') { - sign = static_cast(*current); - ++current; - if (current == end) { - if (allow_trailing_junk) { - goto parsing_done; - } else { - return junk_string_value_; - } - } - } - - if (current == end || *current < '0' || *current > '9') { - if (allow_trailing_junk) { - goto parsing_done; - } else { - return junk_string_value_; - } - } - - const int max_exponent = INT_MAX / 2; - ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); - int num = 0; - do { - // Check overflow. - int digit = *current - '0'; - if (num >= max_exponent / 10 - && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { - num = max_exponent; - } else { - num = num * 10 + digit; - } - ++current; - } while (current != end && *current >= '0' && *current <= '9'); - - exponent += (sign == '-' ? -num : num); - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - if (allow_trailing_spaces) { - AdvanceToNonspace(¤t, end); - } - - parsing_done: - exponent += insignificant_digits; - - if (octal) { - double result; - const char* tail_pointer = NULL; - result = RadixStringToIeee<3>(buffer, - buffer + buffer_pos, - sign, - allow_trailing_junk, - junk_string_value_, - read_as_double, - &tail_pointer); - ASSERT(tail_pointer != NULL); - *processed_characters_count = static_cast(current - input); - return result; - } - - if (nonzero_digit_dropped) { - buffer[buffer_pos++] = '1'; - exponent--; - } - - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos] = '\0'; - - double converted; - if (read_as_double) { - converted = Strtod(Vector(buffer, buffer_pos), exponent); - } else { - converted = Strtof(Vector(buffer, buffer_pos), exponent); - } - *processed_characters_count = static_cast(current - input); - return sign? -converted: converted; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/double-conversion.h b/base/poco/Foundation/src/double-conversion.h deleted file mode 100644 index 851049bf7f0..00000000000 --- a/base/poco/Foundation/src/double-conversion.h +++ /dev/null @@ -1,512 +0,0 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ -#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ - -#include "utils.h" - -namespace double_conversion -{ - -class DoubleToStringConverter -{ -public: - // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint - // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the - // function returns false. - static const int kMaxFixedDigitsBeforePoint = 60; - static const int kMaxFixedDigitsAfterPoint = 60; - - // When calling ToExponential with a requested_digits - // parameter > kMaxExponentialDigits then the function returns false. - static const int kMaxExponentialDigits = 120; - - // When calling ToPrecision with a requested_digits - // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits - // then the function returns false. - static const int kMinPrecisionDigits = 1; - static const int kMaxPrecisionDigits = 120; - - enum Flags - { - NO_FLAGS = 0, - EMIT_POSITIVE_EXPONENT_SIGN = 1, - EMIT_TRAILING_DECIMAL_POINT = 2, - EMIT_TRAILING_ZERO_AFTER_POINT = 4, - UNIQUE_ZERO = 8 - }; - - // Flags should be a bit-or combination of the possible Flags-enum. - // - NO_FLAGS: no special flags. - // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent - // form, emits a '+' for positive exponents. Example: 1.2e+2. - // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is - // converted into decimal format then a trailing decimal point is appended. - // Example: 2345.0 is converted to "2345.". - // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point - // emits a trailing '0'-character. This flag requires the - // EXMIT_TRAILING_DECIMAL_POINT flag. - // Example: 2345.0 is converted to "2345.0". - // - UNIQUE_ZERO: "-0.0" is converted to "0.0". - // - // Infinity symbol and nan_symbol provide the string representation for these - // special values. If the string is NULL and the special value is encountered - // then the conversion functions return false. - // - // The exponent_character is used in exponential representations. It is - // usually 'e' or 'E'. - // - // When converting to the shortest representation the converter will - // represent input numbers in decimal format if they are in the interval - // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ - // (lower boundary included, greater boundary excluded). - // Example: with decimal_in_shortest_low = -6 and - // decimal_in_shortest_high = 21: - // ToShortest(0.000001) -> "0.000001" - // ToShortest(0.0000001) -> "1e-7" - // ToShortest(111111111111111111111.0) -> "111111111111111110000" - // ToShortest(100000000000000000000.0) -> "100000000000000000000" - // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" - // - // When converting to precision mode the converter may add - // max_leading_padding_zeroes before returning the number in exponential - // format. - // Example with max_leading_padding_zeroes_in_precision_mode = 6. - // ToPrecision(0.0000012345, 2) -> "0.0000012" - // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarly the converter may add up to - // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid - // returning an exponential representation. A zero added by the - // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: - // ToPrecision(230.0, 2) -> "230" - // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. - // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. - DoubleToStringConverter( - int flags, - const char * infinity_symbol, - const char * nan_symbol, - char exponent_character, - int decimal_in_shortest_low, - int decimal_in_shortest_high, - int max_leading_padding_zeroes_in_precision_mode, - int max_trailing_padding_zeroes_in_precision_mode) - : flags_(flags) - , infinity_symbol_(infinity_symbol) - , nan_symbol_(nan_symbol) - , exponent_character_(exponent_character) - , decimal_in_shortest_low_(decimal_in_shortest_low) - , decimal_in_shortest_high_(decimal_in_shortest_high) - , max_leading_padding_zeroes_in_precision_mode_(max_leading_padding_zeroes_in_precision_mode) - , max_trailing_padding_zeroes_in_precision_mode_(max_trailing_padding_zeroes_in_precision_mode) - { - // When 'trailing zero after the point' is set, then 'trailing point' - // must be set too. - ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); - } - - // Returns a converter following the EcmaScript specification. - static const DoubleToStringConverter & EcmaScriptConverter(); - - // Computes the shortest string of digits that correctly represent the input - // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high - // (see constructor) it then either returns a decimal representation, or an - // exponential representation. - // Example with decimal_in_shortest_low = -6, - // decimal_in_shortest_high = 21, - // EMIT_POSITIVE_EXPONENT_SIGN activated, and - // EMIT_TRAILING_DECIMAL_POINT deactivated: - // ToShortest(0.000001) -> "0.000001" - // ToShortest(0.0000001) -> "1e-7" - // ToShortest(111111111111111111111.0) -> "111111111111111110000" - // ToShortest(100000000000000000000.0) -> "100000000000000000000" - // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" - // - // Note: the conversion may round the output if the returned string - // is accurate enough to uniquely identify the input-number. - // For example the most precise representation of the double 9e59 equals - // "899999999999999918767229449717619953810131273674690656206848", but - // the converter will return the shorter (but still correct) "9e59". - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except when the input value is special and no infinity_symbol or - // nan_symbol has been given to the constructor. - bool ToShortest(double value, StringBuilder * result_builder) const { return ToShortestIeeeNumber(value, result_builder, SHORTEST); } - - // Same as ToShortest, but for single-precision floats. - bool ToShortestSingle(float value, StringBuilder * result_builder) const - { - return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); - } - - - // Computes a decimal representation with a fixed number of digits after the - // decimal point. The last emitted digit is rounded. - // - // Examples: - // ToFixed(3.12, 1) -> "3.1" - // ToFixed(3.1415, 3) -> "3.142" - // ToFixed(1234.56789, 4) -> "1234.5679" - // ToFixed(1.23, 5) -> "1.23000" - // ToFixed(0.1, 4) -> "0.1000" - // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" - // ToFixed(0.1, 30) -> "0.100000000000000005551115123126" - // ToFixed(0.1, 17) -> "0.10000000000000001" - // - // If requested_digits equals 0, then the tail of the result depends on - // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. - // Examples, for requested_digits == 0, - // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be - // - false and false: then 123.45 -> 123 - // 0.678 -> 1 - // - true and false: then 123.45 -> 123. - // 0.678 -> 1. - // - true and true: then 123.45 -> 123.0 - // 0.678 -> 1.0 - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - 'value' > 10^kMaxFixedDigitsBeforePoint, or - // - 'requested_digits' > kMaxFixedDigitsAfterPoint. - // The last two conditions imply that the result will never contain more than - // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters - // (one additional character for the sign, and one for the decimal point). - bool ToFixed(double value, int requested_digits, StringBuilder * result_builder) const; - - // Computes a representation in exponential format with requested_digits - // after the decimal point. The last emitted digit is rounded. - // If requested_digits equals -1, then the shortest exponential representation - // is computed. - // - // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and - // exponent_character set to 'e'. - // ToExponential(3.12, 1) -> "3.1e0" - // ToExponential(5.0, 3) -> "5.000e0" - // ToExponential(0.001, 2) -> "1.00e-3" - // ToExponential(3.1415, -1) -> "3.1415e0" - // ToExponential(3.1415, 4) -> "3.1415e0" - // ToExponential(3.1415, 3) -> "3.142e0" - // ToExponential(123456789000000, 3) -> "1.235e14" - // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" - // ToExponential(1000000000000000019884624838656.0, 32) -> - // "1.00000000000000001988462483865600e30" - // ToExponential(1234, 0) -> "1e3" - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - 'requested_digits' > kMaxExponentialDigits. - // The last condition implies that the result will never contain more than - // kMaxExponentialDigits + 8 characters (the sign, the digit before the - // decimal point, the decimal point, the exponent character, the - // exponent's sign, and at most 3 exponent digits). - bool ToExponential(double value, int requested_digits, StringBuilder * result_builder) const; - - // Computes 'precision' leading digits of the given 'value' and returns them - // either in exponential or decimal format, depending on - // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the - // constructor). - // The last computed digit is rounded. - // - // Example with max_leading_padding_zeroes_in_precision_mode = 6. - // ToPrecision(0.0000012345, 2) -> "0.0000012" - // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarly the converter may add up to - // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid - // returning an exponential representation. A zero added by the - // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: - // ToPrecision(230.0, 2) -> "230" - // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. - // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no - // EMIT_TRAILING_ZERO_AFTER_POINT: - // ToPrecision(123450.0, 6) -> "123450" - // ToPrecision(123450.0, 5) -> "123450" - // ToPrecision(123450.0, 4) -> "123500" - // ToPrecision(123450.0, 3) -> "123000" - // ToPrecision(123450.0, 2) -> "1.2e5" - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - precision < kMinPericisionDigits - // - precision > kMaxPrecisionDigits - // The last condition implies that the result will never contain more than - // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the - // exponent character, the exponent's sign, and at most 3 exponent digits). - bool ToPrecision(double value, int precision, StringBuilder * result_builder) const; - - enum DtoaMode - { - // Produce the shortest correct representation. - // For example the output of 0.299999999999999988897 is (the less accurate - // but correct) 0.3. - SHORTEST, - // Same as SHORTEST, but for single-precision floats. - SHORTEST_SINGLE, - // Produce a fixed number of digits after the decimal point. - // For instance fixed(0.1, 4) becomes 0.1000 - // If the input number is big, the output will be big. - FIXED, - // Fixed number of digits (independent of the decimal point). - PRECISION - }; - - // The maximal number of digits that are needed to emit a double in base 10. - // A higher precision can be achieved by using more digits, but the shortest - // accurate representation of any double will never use more digits than - // kBase10MaximalLength. - // Note that DoubleToAscii null-terminates its input. So the given buffer - // should be at least kBase10MaximalLength + 1 characters long. - static const int kBase10MaximalLength = 17; - - // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or - // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v' - // after it has been casted to a single-precision float. That is, in this - // mode static_cast(v) must not be NaN, +Infinity or -Infinity. - // - // The result should be interpreted as buffer * 10^(point-length). - // - // The output depends on the given mode: - // - SHORTEST: produce the least amount of digits for which the internal - // identity requirement is still satisfied. If the digits are printed - // (together with the correct exponent) then reading this number will give - // 'v' again. The buffer will choose the representation that is closest to - // 'v'. If there are two at the same distance, than the one farther away - // from 0 is chosen (halfway cases - ending with 5 - are rounded up). - // In this mode the 'requested_digits' parameter is ignored. - // - SHORTEST_SINGLE: same as SHORTEST but with single-precision. - // - FIXED: produces digits necessary to print a given number with - // 'requested_digits' digits after the decimal point. The produced digits - // might be too short in which case the caller has to fill the remainder - // with '0's. - // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. - // Halfway cases are rounded towards +/-Infinity (away from 0). The call - // toFixed(0.15, 2) thus returns buffer="2", point=0. - // The returned buffer may contain digits that would be truncated from the - // shortest representation of the input. - // - PRECISION: produces 'requested_digits' where the first digit is not '0'. - // Even though the length of produced digits usually equals - // 'requested_digits', the function is allowed to return fewer digits, in - // which case the caller has to fill the missing digits with '0's. - // Halfway cases are again rounded away from 0. - // DoubleToAscii expects the given buffer to be big enough to hold all - // digits and a terminating null-character. In SHORTEST-mode it expects a - // buffer of at least kBase10MaximalLength + 1. In all other modes the - // requested_digits parameter and the padding-zeroes limit the size of the - // output. Don't forget the decimal point, the exponent character and the - // terminating null-character when computing the maximal output size. - // The given length is only used in debug mode to ensure the buffer is big - // enough. - static void - DoubleToAscii(double v, DtoaMode mode, int requested_digits, char * buffer, int buffer_length, bool * sign, int * length, int * point); - -private: - // Implementation for ToShortest and ToShortestSingle. - bool ToShortestIeeeNumber(double value, StringBuilder * result_builder, DtoaMode mode) const; - - // If the value is a special value (NaN or Infinity) constructs the - // corresponding string using the configured infinity/nan-symbol. - // If either of them is NULL or the value is not special then the - // function returns false. - bool HandleSpecialValues(double value, StringBuilder * result_builder) const; - // Constructs an exponential representation (i.e. 1.234e56). - // The given exponent assumes a decimal point after the first decimal digit. - void CreateExponentialRepresentation(const char * decimal_digits, int length, int exponent, StringBuilder * result_builder) const; - // Creates a decimal representation (i.e 1234.5678). - void CreateDecimalRepresentation( - const char * decimal_digits, int length, int decimal_point, int digits_after_point, StringBuilder * result_builder) const; - - const int flags_; - const char * const infinity_symbol_; - const char * const nan_symbol_; - const char exponent_character_; - const int decimal_in_shortest_low_; - const int decimal_in_shortest_high_; - const int max_leading_padding_zeroes_in_precision_mode_; - const int max_trailing_padding_zeroes_in_precision_mode_; - - DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter); -}; - - -class StringToDoubleConverter -{ -public: - // Enumeration for allowing octals and ignoring junk when converting - // strings to numbers. - enum Flags - { - NO_FLAGS = 0, - ALLOW_HEX = 1, - ALLOW_OCTALS = 2, - ALLOW_TRAILING_JUNK = 4, - ALLOW_LEADING_SPACES = 8, - ALLOW_TRAILING_SPACES = 16, - ALLOW_SPACES_AFTER_SIGN = 32 - }; - - // Flags should be a bit-or combination of the possible Flags-enum. - // - NO_FLAGS: no special flags. - // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. - // Ex: StringToDouble("0x1234") -> 4660.0 - // In StringToDouble("0x1234.56") the characters ".56" are trailing - // junk. The result of the call is hence dependent on - // the ALLOW_TRAILING_JUNK flag and/or the junk value. - // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, - // the string will not be parsed as "0" followed by junk. - // - // - ALLOW_OCTALS: recognizes the prefix "0" for octals: - // If a sequence of octal digits starts with '0', then the number is - // read as octal integer. Octal numbers may only be integers. - // Ex: StringToDouble("01234") -> 668.0 - // StringToDouble("012349") -> 12349.0 // Not a sequence of octal - // // digits. - // In StringToDouble("01234.56") the characters ".56" are trailing - // junk. The result of the call is hence dependent on - // the ALLOW_TRAILING_JUNK flag and/or the junk value. - // In StringToDouble("01234e56") the characters "e56" are trailing - // junk, too. - // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of - // a double literal. - // - ALLOW_LEADING_SPACES: skip over leading spaces. - // - ALLOW_TRAILING_SPACES: ignore trailing spaces. - // - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign. - // Ex: StringToDouble("- 123.2") -> -123.2. - // StringToDouble("+ 123.2") -> 123.2 - // - // empty_string_value is returned when an empty string is given as input. - // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string - // containing only spaces is converted to the 'empty_string_value', too. - // - // junk_string_value is returned when - // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not - // part of a double-literal) is found. - // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a - // double literal. - // - // infinity_symbol and nan_symbol are strings that are used to detect - // inputs that represent infinity and NaN. They can be null, in which case - // they are ignored. - // The conversion routine first reads any possible signs. Then it compares the - // following character of the input-string with the first character of - // the infinity, and nan-symbol. If either matches, the function assumes, that - // a match has been found, and expects the following input characters to match - // the remaining characters of the special-value symbol. - // This means that the following restrictions apply to special-value symbols: - // - they must not start with signs ('+', or '-'), - // - they must not have the same first character. - // - they must not start with digits. - // - // Examples: - // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, - // empty_string_value = 0.0, - // junk_string_value = NaN, - // infinity_symbol = "infinity", - // nan_symbol = "nan": - // StringToDouble("0x1234") -> 4660.0. - // StringToDouble("0x1234K") -> 4660.0. - // StringToDouble("") -> 0.0 // empty_string_value. - // StringToDouble(" ") -> NaN // junk_string_value. - // StringToDouble(" 1") -> NaN // junk_string_value. - // StringToDouble("0x") -> NaN // junk_string_value. - // StringToDouble("-123.45") -> -123.45. - // StringToDouble("--123.45") -> NaN // junk_string_value. - // StringToDouble("123e45") -> 123e45. - // StringToDouble("123E45") -> 123e45. - // StringToDouble("123e+45") -> 123e45. - // StringToDouble("123E-45") -> 123e-45. - // StringToDouble("123e") -> 123.0 // trailing junk ignored. - // StringToDouble("123e-") -> 123.0 // trailing junk ignored. - // StringToDouble("+NaN") -> NaN // NaN string literal. - // StringToDouble("-infinity") -> -inf. // infinity literal. - // StringToDouble("Infinity") -> NaN // junk_string_value. - // - // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, - // empty_string_value = 0.0, - // junk_string_value = NaN, - // infinity_symbol = NULL, - // nan_symbol = NULL: - // StringToDouble("0x1234") -> NaN // junk_string_value. - // StringToDouble("01234") -> 668.0. - // StringToDouble("") -> 0.0 // empty_string_value. - // StringToDouble(" ") -> 0.0 // empty_string_value. - // StringToDouble(" 1") -> 1.0 - // StringToDouble("0x") -> NaN // junk_string_value. - // StringToDouble("0123e45") -> NaN // junk_string_value. - // StringToDouble("01239E45") -> 1239e45. - // StringToDouble("-infinity") -> NaN // junk_string_value. - // StringToDouble("NaN") -> NaN // junk_string_value. - StringToDoubleConverter( - int flags, double empty_string_value, double junk_string_value, const char * infinity_symbol, const char * nan_symbol) - : flags_(flags) - , empty_string_value_(empty_string_value) - , junk_string_value_(junk_string_value) - , infinity_symbol_(infinity_symbol) - , nan_symbol_(nan_symbol) - { - } - - // Performs the conversion. - // The output parameter 'processed_characters_count' is set to the number - // of characters that have been processed to read the number. - // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included - // in the 'processed_characters_count'. Trailing junk is never included. - double StringToDouble(const char * buffer, int length, int * processed_characters_count) const - { - return StringToIeee(buffer, length, processed_characters_count, true); - } - - // Same as StringToDouble but reads a float. - // Note that this is not equivalent to static_cast(StringToDouble(...)) - // due to potential double-rounding. - float StringToFloat(const char * buffer, int length, int * processed_characters_count) const - { - return static_cast(StringToIeee(buffer, length, processed_characters_count, false)); - } - -private: - const int flags_; - const double empty_string_value_; - const double junk_string_value_; - const char * const infinity_symbol_; - const char * const nan_symbol_; - - double StringToIeee(const char * buffer, int length, int * processed_characters_count, bool read_as_double) const; - - DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ diff --git a/base/poco/Foundation/src/fast-dtoa.cc b/base/poco/Foundation/src/fast-dtoa.cc deleted file mode 100644 index a58f4d4487a..00000000000 --- a/base/poco/Foundation/src/fast-dtoa.cc +++ /dev/null @@ -1,665 +0,0 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "fast-dtoa.h" - -#include "cached-powers.h" -#include "diy-fp.h" -#include "ieee.h" - -namespace double_conversion { - -// The minimal and maximal target exponent define the range of w's binary -// exponent, where 'w' is the result of multiplying the input by a cached power -// of ten. -// -// A different range might be chosen on a different platform, to optimize digit -// generation, but a smaller range requires more powers of ten to be cached. -static const int kMinimalTargetExponent = -60; -static const int kMaximalTargetExponent = -32; - - -// Adjusts the last digit of the generated number, and screens out generated -// solutions that may be inaccurate. A solution may be inaccurate if it is -// outside the safe interval, or if we cannot prove that it is closer to the -// input than a neighboring representation of the same length. -// -// Input: * buffer containing the digits of too_high / 10^kappa -// * the buffer's length -// * distance_too_high_w == (too_high - w).f() * unit -// * unsafe_interval == (too_high - too_low).f() * unit -// * rest = (too_high - buffer * 10^kappa).f() * unit -// * ten_kappa = 10^kappa * unit -// * unit = the common multiplier -// Output: returns true if the buffer is guaranteed to contain the closest -// representable number to the input. -// Modifies the generated digits in the buffer to approach (round towards) w. -static bool RoundWeed(Vector buffer, - int length, - uint64_t distance_too_high_w, - uint64_t unsafe_interval, - uint64_t rest, - uint64_t ten_kappa, - uint64_t unit) { - uint64_t small_distance = distance_too_high_w - unit; - uint64_t big_distance = distance_too_high_w + unit; - // Let w_low = too_high - big_distance, and - // w_high = too_high - small_distance. - // Note: w_low < w < w_high - // - // The real w (* unit) must lie somewhere inside the interval - // ]w_low; w_high[ (often written as "(w_low; w_high)") - - // Basically the buffer currently contains a number in the unsafe interval - // ]too_low; too_high[ with too_low < w < too_high - // - // too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // ^v 1 unit ^ ^ ^ ^ - // boundary_high --------------------- . . . . - // ^v 1 unit . . . . - // - - - - - - - - - - - - - - - - - - - + - - + - - - - - - . . - // . . ^ . . - // . big_distance . . . - // . . . . rest - // small_distance . . . . - // v . . . . - // w_high - - - - - - - - - - - - - - - - - - . . . . - // ^v 1 unit . . . . - // w ---------------------------------------- . . . . - // ^v 1 unit v . . . - // w_low - - - - - - - - - - - - - - - - - - - - - . . . - // . . v - // buffer --------------------------------------------------+-------+-------- - // . . - // safe_interval . - // v . - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - // ^v 1 unit . - // boundary_low ------------------------- unsafe_interval - // ^v 1 unit v - // too_low - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - // - // Note that the value of buffer could lie anywhere inside the range too_low - // to too_high. - // - // boundary_low, boundary_high and w are approximations of the real boundaries - // and v (the input number). They are guaranteed to be precise up to one unit. - // In fact the error is guaranteed to be strictly less than one unit. - // - // Anything that lies outside the unsafe interval is guaranteed not to round - // to v when read again. - // Anything that lies inside the safe interval is guaranteed to round to v - // when read again. - // If the number inside the buffer lies inside the unsafe interval but not - // inside the safe interval then we simply do not know and bail out (returning - // false). - // - // Similarly we have to take into account the imprecision of 'w' when finding - // the closest representation of 'w'. If we have two potential - // representations, and one is closer to both w_low and w_high, then we know - // it is closer to the actual value v. - // - // By generating the digits of too_high we got the largest (closest to - // too_high) buffer that is still in the unsafe interval. In the case where - // w_high < buffer < too_high we try to decrement the buffer. - // This way the buffer approaches (rounds towards) w. - // There are 3 conditions that stop the decrementation process: - // 1) the buffer is already below w_high - // 2) decrementing the buffer would make it leave the unsafe interval - // 3) decrementing the buffer would yield a number below w_high and farther - // away than the current number. In other words: - // (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high - // Instead of using the buffer directly we use its distance to too_high. - // Conceptually rest ~= too_high - buffer - // We need to do the following tests in this order to avoid over- and - // underflows. - ASSERT(rest <= unsafe_interval); - while (rest < small_distance && // Negated condition 1 - unsafe_interval - rest >= ten_kappa && // Negated condition 2 - (rest + ten_kappa < small_distance || // buffer{-1} > w_high - small_distance - rest >= rest + ten_kappa - small_distance)) { - buffer[length - 1]--; - rest += ten_kappa; - } - - // We have approached w+ as much as possible. We now test if approaching w- - // would require changing the buffer. If yes, then we have two possible - // representations close to w, but we cannot decide which one is closer. - if (rest < big_distance && - unsafe_interval - rest >= ten_kappa && - (rest + ten_kappa < big_distance || - big_distance - rest > rest + ten_kappa - big_distance)) { - return false; - } - - // Weeding test. - // The safe interval is [too_low + 2 ulp; too_high - 2 ulp] - // Since too_low = too_high - unsafe_interval this is equivalent to - // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] - // Conceptually we have: rest ~= too_high - buffer - return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); -} - - -// Rounds the buffer upwards if the result is closer to v by possibly adding -// 1 to the buffer. If the precision of the calculation is not sufficient to -// round correctly, return false. -// The rounding might shift the whole buffer in which case the kappa is -// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. -// -// If 2*rest > ten_kappa then the buffer needs to be round up. -// rest can have an error of +/- 1 unit. This function accounts for the -// imprecision and returns false, if the rounding direction cannot be -// unambiguously determined. -// -// Precondition: rest < ten_kappa. -static bool RoundWeedCounted(Vector buffer, - int length, - uint64_t rest, - uint64_t ten_kappa, - uint64_t unit, - int* kappa) { - ASSERT(rest < ten_kappa); - // The following tests are done in a specific order to avoid overflows. They - // will work correctly with any uint64 values of rest < ten_kappa and unit. - // - // If the unit is too big, then we don't know which way to round. For example - // a unit of 50 means that the real number lies within rest +/- 50. If - // 10^kappa == 40 then there is no way to tell which way to round. - if (unit >= ten_kappa) return false; - // Even if unit is just half the size of 10^kappa we are already completely - // lost. (And after the previous test we know that the expression will not - // over/underflow.) - if (ten_kappa - unit <= unit) return false; - // If 2 * (rest + unit) <= 10^kappa we can safely round down. - if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { - return true; - } - // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. - if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { - // Increment the last digit recursively until we find a non '9' digit. - buffer[length - 1]++; - for (int i = length - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) break; - buffer[i] = '0'; - buffer[i - 1]++; - } - // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the - // exception of the first digit all digits are now '0'. Simply switch the - // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and - // the power (the kappa) is increased. - if (buffer[0] == '0' + 10) { - buffer[0] = '1'; - (*kappa) += 1; - } - return true; - } - return false; -} - -// Returns the biggest power of ten that is less than or equal to the given -// number. We furthermore receive the maximum number of bits 'number' has. -// -// Returns power == 10^(exponent_plus_one-1) such that -// power <= number < power * 10. -// If number_bits == 0 then 0^(0-1) is returned. -// The number of bits must be <= 32. -// Precondition: number < (1 << (number_bits + 1)). - -// Inspired by the method for finding an integer log base 10 from here: -// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 -static unsigned int const kSmallPowersOfTen[] = - {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, - 1000000000}; - -static void BiggestPowerTen(uint32_t number, - int number_bits, - uint32_t* power, - int* exponent_plus_one) { - ASSERT(number < (1u << (number_bits + 1))); - // 1233/4096 is approximately 1/lg(10). - int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); - // We increment to skip over the first entry in the kPowersOf10 table. - // Note: kPowersOf10[i] == 10^(i-1). - exponent_plus_one_guess++; - // We don't have any guarantees that 2^number_bits <= number. - if (number < kSmallPowersOfTen[exponent_plus_one_guess] && exponent_plus_one_guess > 0) { - exponent_plus_one_guess--; - } - *power = kSmallPowersOfTen[exponent_plus_one_guess]; - *exponent_plus_one = exponent_plus_one_guess; -} - -// Generates the digits of input number w. -// w is a floating-point number (DiyFp), consisting of a significand and an -// exponent. Its exponent is bounded by kMinimalTargetExponent and -// kMaximalTargetExponent. -// Hence -60 <= w.e() <= -32. -// -// Returns false if it fails, in which case the generated digits in the buffer -// should not be used. -// Preconditions: -// * low, w and high are correct up to 1 ulp (unit in the last place). That -// is, their error must be less than a unit of their last digits. -// * low.e() == w.e() == high.e() -// * low < w < high, and taking into account their error: low~ <= high~ -// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent -// Postconditions: returns false if procedure fails. -// otherwise: -// * buffer is not null-terminated, but len contains the number of digits. -// * buffer contains the shortest possible decimal digit-sequence -// such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the -// correct values of low and high (without their error). -// * if more than one decimal representation gives the minimal number of -// decimal digits then the one closest to W (where W is the correct value -// of w) is chosen. -// Remark: this procedure takes into account the imprecision of its input -// numbers. If the precision is not enough to guarantee all the postconditions -// then false is returned. This usually happens rarely (~0.5%). -// -// Say, for the sake of example, that -// w.e() == -48, and w.f() == 0x1234567890abcdef -// w's value can be computed by w.f() * 2^w.e() -// We can obtain w's integral digits by simply shifting w.f() by -w.e(). -// -> w's integral part is 0x1234 -// w's fractional part is therefore 0x567890abcdef. -// Printing w's integral part is easy (simply print 0x1234 in decimal). -// In order to print its fraction we repeatedly multiply the fraction by 10 and -// get each digit. Example the first digit after the point would be computed by -// (0x567890abcdef * 10) >> 48. -> 3 -// The whole thing becomes slightly more complicated because we want to stop -// once we have enough digits. That is, once the digits inside the buffer -// represent 'w' we can stop. Everything inside the interval low - high -// represents w. However we have to pay attention to low, high and w's -// imprecision. -static bool DigitGen(DiyFp low, - DiyFp w, - DiyFp high, - Vector buffer, - int* length, - int* kappa) { - ASSERT(low.e() == w.e() && w.e() == high.e()); - ASSERT(low.f() + 1 <= high.f() - 1); - ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); - // low, w and high are imprecise, but by less than one ulp (unit in the last - // place). - // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that - // the new numbers are outside of the interval we want the final - // representation to lie in. - // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield - // numbers that are certain to lie in the interval. We will use this fact - // later on. - // We will now start by generating the digits within the uncertain - // interval. Later we will weed out representations that lie outside the safe - // interval and thus _might_ lie outside the correct interval. - uint64_t unit = 1; - DiyFp too_low = DiyFp(low.f() - unit, low.e()); - DiyFp too_high = DiyFp(high.f() + unit, high.e()); - // too_low and too_high are guaranteed to lie outside the interval we want the - // generated number in. - DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); - // We now cut the input number into two parts: the integral digits and the - // fractionals. We will not write any decimal separator though, but adapt - // kappa instead. - // Reminder: we are currently computing the digits (stored inside the buffer) - // such that: too_low < buffer * 10^kappa < too_high - // We use too_high for the digit_generation and stop as soon as possible. - // If we stop early we effectively round down. - DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); - // Division by one is a shift. - uint32_t integrals = static_cast(too_high.f() >> -one.e()); - // Modulo by one is an and. - uint64_t fractionals = too_high.f() & (one.f() - 1); - uint32_t divisor; - int divisor_exponent_plus_one; - BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), - &divisor, &divisor_exponent_plus_one); - *kappa = divisor_exponent_plus_one; - *length = 0; - // Loop invariant: buffer = too_high / 10^kappa (integer division) - // The invariant holds for the first iteration: kappa has been initialized - // with the divisor exponent + 1. And the divisor is the biggest power of ten - // that is smaller than integrals. - while (*kappa > 0) { - int digit = integrals / divisor; - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - integrals %= divisor; - (*kappa)--; - // Note that kappa now equals the exponent of the divisor and that the - // invariant thus holds again. - uint64_t rest = - (static_cast(integrals) << -one.e()) + fractionals; - // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) - // Reminder: unsafe_interval.e() == one.e() - if (rest < unsafe_interval.f()) { - // Rounding down (by not emitting the remaining digits) yields a number - // that lies within the unsafe interval. - return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), - unsafe_interval.f(), rest, - static_cast(divisor) << -one.e(), unit); - } - divisor /= 10; - } - - // The integrals have been generated. We are at the point of the decimal - // separator. In the following loop we simply multiply the remaining digits by - // 10 and divide by one. We just need to pay attention to multiply associated - // data (like the interval or 'unit'), too. - // Note that the multiplication by 10 does not overflow, because w.e >= -60 - // and thus one.e >= -60. - ASSERT(one.e() >= -60); - ASSERT(fractionals < one.f()); - ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - for (;;) { - fractionals *= 10; - unit *= 10; - unsafe_interval.set_f(unsafe_interval.f() * 10); - // Integer division by one. - int digit = static_cast(fractionals >> -one.e()); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - fractionals &= one.f() - 1; // Modulo by one. - (*kappa)--; - if (fractionals < unsafe_interval.f()) { - return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, - unsafe_interval.f(), fractionals, one.f(), unit); - } - } -} - - - -// Generates (at most) requested_digits digits of input number w. -// w is a floating-point number (DiyFp), consisting of a significand and an -// exponent. Its exponent is bounded by kMinimalTargetExponent and -// kMaximalTargetExponent. -// Hence -60 <= w.e() <= -32. -// -// Returns false if it fails, in which case the generated digits in the buffer -// should not be used. -// Preconditions: -// * w is correct up to 1 ulp (unit in the last place). That -// is, its error must be strictly less than a unit of its last digit. -// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent -// -// Postconditions: returns false if procedure fails. -// otherwise: -// * buffer is not null-terminated, but length contains the number of -// digits. -// * the representation in buffer is the most precise representation of -// requested_digits digits. -// * buffer contains at most requested_digits digits of w. If there are less -// than requested_digits digits then some trailing '0's have been removed. -// * kappa is such that -// w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. -// -// Remark: This procedure takes into account the imprecision of its input -// numbers. If the precision is not enough to guarantee all the postconditions -// then false is returned. This usually happens rarely, but the failure-rate -// increases with higher requested_digits. -static bool DigitGenCounted(DiyFp w, - int requested_digits, - Vector buffer, - int* length, - int* kappa) { - ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); - ASSERT(kMinimalTargetExponent >= -60); - ASSERT(kMaximalTargetExponent <= -32); - // w is assumed to have an error less than 1 unit. Whenever w is scaled we - // also scale its error. - uint64_t w_error = 1; - // We cut the input number into two parts: the integral digits and the - // fractional digits. We don't emit any decimal separator, but adapt kappa - // instead. Example: instead of writing "1.2" we put "12" into the buffer and - // increase kappa by 1. - DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); - // Division by one is a shift. - uint32_t integrals = static_cast(w.f() >> -one.e()); - // Modulo by one is an and. - uint64_t fractionals = w.f() & (one.f() - 1); - uint32_t divisor; - int divisor_exponent_plus_one; - BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), - &divisor, &divisor_exponent_plus_one); - *kappa = divisor_exponent_plus_one; - *length = 0; - - // Loop invariant: buffer = w / 10^kappa (integer division) - // The invariant holds for the first iteration: kappa has been initialized - // with the divisor exponent + 1. And the divisor is the biggest power of ten - // that is smaller than 'integrals'. - while (*kappa > 0) { - int digit = integrals / divisor; - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - requested_digits--; - integrals %= divisor; - (*kappa)--; - // Note that kappa now equals the exponent of the divisor and that the - // invariant thus holds again. - if (requested_digits == 0) break; - divisor /= 10; - } - - if (requested_digits == 0) { - uint64_t rest = - (static_cast(integrals) << -one.e()) + fractionals; - return RoundWeedCounted(buffer, *length, rest, - static_cast(divisor) << -one.e(), w_error, - kappa); - } - - // The integrals have been generated. We are at the point of the decimal - // separator. In the following loop we simply multiply the remaining digits by - // 10 and divide by one. We just need to pay attention to multiply associated - // data (the 'unit'), too. - // Note that the multiplication by 10 does not overflow, because w.e >= -60 - // and thus one.e >= -60. - ASSERT(one.e() >= -60); - ASSERT(fractionals < one.f()); - ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - while (requested_digits > 0 && fractionals > w_error) { - fractionals *= 10; - w_error *= 10; - // Integer division by one. - int digit = static_cast(fractionals >> -one.e()); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - requested_digits--; - fractionals &= one.f() - 1; // Modulo by one. - (*kappa)--; - } - if (requested_digits != 0) return false; - return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, - kappa); -} - - -// Provides a decimal representation of v. -// Returns true if it succeeds, otherwise the result cannot be trusted. -// There will be *length digits inside the buffer (not null-terminated). -// If the function returns true then -// v == (double) (buffer * 10^decimal_exponent). -// The digits in the buffer are the shortest representation possible: no -// 0.09999999999999999 instead of 0.1. The shorter representation will even be -// chosen even if the longer one would be closer to v. -// The last digit will be closest to the actual v. That is, even if several -// digits might correctly yield 'v' when read again, the closest will be -// computed. -static bool Grisu3(double v, - FastDtoaMode mode, - Vector buffer, - int* length, - int* decimal_exponent) { - DiyFp w = Double(v).AsNormalizedDiyFp(); - // boundary_minus and boundary_plus are the boundaries between v and its - // closest floating-point neighbors. Any number strictly between - // boundary_minus and boundary_plus will round to v when convert to a double. - // Grisu3 will never output representations that lie exactly on a boundary. - DiyFp boundary_minus, boundary_plus; - if (mode == FAST_DTOA_SHORTEST) { - Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); - } else { - ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); - float single_v = static_cast(v); - Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); - } - ASSERT(boundary_plus.e() == w.e()); - DiyFp ten_mk; // Cached power of ten: 10^-k - int mk; // -k - int ten_mk_minimal_binary_exponent = - kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); - int ten_mk_maximal_binary_exponent = - kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); - PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - ten_mk_minimal_binary_exponent, - ten_mk_maximal_binary_exponent, - &ten_mk, &mk); - ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + - DiyFp::kSignificandSize) && - (kMaximalTargetExponent >= w.e() + ten_mk.e() + - DiyFp::kSignificandSize)); - // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a - // 64 bit significand and ten_mk is thus only precise up to 64 bits. - - // The DiyFp::Times procedure rounds its result, and ten_mk is approximated - // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now - // off by a small amount. - // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. - // In other words: let f = scaled_w.f() and e = scaled_w.e(), then - // (f-1) * 2^e < w*10^k < (f+1) * 2^e - DiyFp scaled_w = DiyFp::Times(w, ten_mk); - ASSERT(scaled_w.e() == - boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); - // In theory it would be possible to avoid some recomputations by computing - // the difference between w and boundary_minus/plus (a power of 2) and to - // compute scaled_boundary_minus/plus by subtracting/adding from - // scaled_w. However the code becomes much less readable and the speed - // enhancements are not terrific. - DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); - DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); - - // DigitGen will generate the digits of scaled_w. Therefore we have - // v == (double) (scaled_w * 10^-mk). - // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an - // integer than it will be updated. For instance if scaled_w == 1.23 then - // the buffer will be filled with "123" und the decimal_exponent will be - // decreased by 2. - int kappa; - bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, - buffer, length, &kappa); - *decimal_exponent = -mk + kappa; - return result; -} - - -// The "counted" version of grisu3 (see above) only generates requested_digits -// number of digits. This version does not generate the shortest representation, -// and with enough requested digits 0.1 will at some point print as 0.9999999... -// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and -// therefore the rounding strategy for halfway cases is irrelevant. -static bool Grisu3Counted(double v, - int requested_digits, - Vector buffer, - int* length, - int* decimal_exponent) { - DiyFp w = Double(v).AsNormalizedDiyFp(); - DiyFp ten_mk; // Cached power of ten: 10^-k - int mk; // -k - int ten_mk_minimal_binary_exponent = - kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); - int ten_mk_maximal_binary_exponent = - kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); - PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - ten_mk_minimal_binary_exponent, - ten_mk_maximal_binary_exponent, - &ten_mk, &mk); - ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + - DiyFp::kSignificandSize) && - (kMaximalTargetExponent >= w.e() + ten_mk.e() + - DiyFp::kSignificandSize)); - // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a - // 64 bit significand and ten_mk is thus only precise up to 64 bits. - - // The DiyFp::Times procedure rounds its result, and ten_mk is approximated - // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now - // off by a small amount. - // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. - // In other words: let f = scaled_w.f() and e = scaled_w.e(), then - // (f-1) * 2^e < w*10^k < (f+1) * 2^e - DiyFp scaled_w = DiyFp::Times(w, ten_mk); - - // We now have (double) (scaled_w * 10^-mk). - // DigitGen will generate the first requested_digits digits of scaled_w and - // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It - // will not always be exactly the same since DigitGenCounted only produces a - // limited number of digits.) - int kappa; - bool result = DigitGenCounted(scaled_w, requested_digits, - buffer, length, &kappa); - *decimal_exponent = -mk + kappa; - return result; -} - - -bool FastDtoa(double v, - FastDtoaMode mode, - int requested_digits, - Vector buffer, - int* length, - int* decimal_point) { - ASSERT(v > 0); - ASSERT(!Double(v).IsSpecial()); - - bool result = false; - int decimal_exponent = 0; - switch (mode) { - case FAST_DTOA_SHORTEST: - case FAST_DTOA_SHORTEST_SINGLE: - result = Grisu3(v, mode, buffer, length, &decimal_exponent); - break; - case FAST_DTOA_PRECISION: - result = Grisu3Counted(v, requested_digits, - buffer, length, &decimal_exponent); - break; - default: - UNREACHABLE(); - } - if (result) { - *decimal_point = *length + decimal_exponent; - buffer[*length] = '\0'; - } - return result; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/fast-dtoa.h b/base/poco/Foundation/src/fast-dtoa.h deleted file mode 100644 index dc3be8b71b0..00000000000 --- a/base/poco/Foundation/src/fast-dtoa.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ -#define DOUBLE_CONVERSION_FAST_DTOA_H_ - -#include "utils.h" - -namespace double_conversion -{ - -enum FastDtoaMode -{ - // Computes the shortest representation of the given input. The returned - // result will be the most accurate number of this length. Longer - // representations might be more accurate. - FAST_DTOA_SHORTEST, - // Same as FAST_DTOA_SHORTEST but for single-precision floats. - FAST_DTOA_SHORTEST_SINGLE, - // Computes a representation where the precision (number of digits) is - // given as input. The precision is independent of the decimal point. - FAST_DTOA_PRECISION -}; - -// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not -// include the terminating '\0' character. -static const int kFastDtoaMaximalLength = 17; -// Same for single-precision numbers. -static const int kFastDtoaMaximalSingleLength = 9; - -// Provides a decimal representation of v. -// The result should be interpreted as buffer * 10^(point - length). -// -// Precondition: -// * v must be a strictly positive finite double. -// -// Returns true if it succeeds, otherwise the result can not be trusted. -// There will be *length digits inside the buffer followed by a null terminator. -// If the function returns true and mode equals -// - FAST_DTOA_SHORTEST, then -// the parameter requested_digits is ignored. -// The result satisfies -// v == (double) (buffer * 10^(point - length)). -// The digits in the buffer are the shortest representation possible. E.g. -// if 0.099999999999 and 0.1 represent the same double then "1" is returned -// with point = 0. -// The last digit will be closest to the actual v. That is, even if several -// digits might correctly yield 'v' when read again, the buffer will contain -// the one closest to v. -// - FAST_DTOA_PRECISION, then -// the buffer contains requested_digits digits. -// the difference v - (buffer * 10^(point-length)) is closest to zero for -// all possible representations of requested_digits digits. -// If there are two values that are equally close, then FastDtoa returns -// false. -// For both modes the buffer must be large enough to hold the result. -bool FastDtoa(double d, FastDtoaMode mode, int requested_digits, Vector buffer, int * length, int * decimal_point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_FAST_DTOA_H_ diff --git a/base/poco/Foundation/src/fixed-dtoa.cc b/base/poco/Foundation/src/fixed-dtoa.cc deleted file mode 100644 index 390e823d95e..00000000000 --- a/base/poco/Foundation/src/fixed-dtoa.cc +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - -#include "fixed-dtoa.h" -#include "ieee.h" - -namespace double_conversion { - -// Represents a 128bit type. This class should be replaced by a native type on -// platforms that support 128bit integers. -class UInt128 { - public: - UInt128() : high_bits_(0), low_bits_(0) { } - UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { } - - void Multiply(uint32_t multiplicand) { - uint64_t accumulator; - - accumulator = (low_bits_ & kMask32) * multiplicand; - uint32_t part = static_cast(accumulator & kMask32); - accumulator >>= 32; - accumulator = accumulator + (low_bits_ >> 32) * multiplicand; - low_bits_ = (accumulator << 32) + part; - accumulator >>= 32; - accumulator = accumulator + (high_bits_ & kMask32) * multiplicand; - part = static_cast(accumulator & kMask32); - accumulator >>= 32; - accumulator = accumulator + (high_bits_ >> 32) * multiplicand; - high_bits_ = (accumulator << 32) + part; - ASSERT((accumulator >> 32) == 0); - } - - void Shift(int shift_amount) { - ASSERT(-64 <= shift_amount && shift_amount <= 64); - if (shift_amount == 0) { - return; - } else if (shift_amount == -64) { - high_bits_ = low_bits_; - low_bits_ = 0; - } else if (shift_amount == 64) { - low_bits_ = high_bits_; - high_bits_ = 0; - } else if (shift_amount <= 0) { - high_bits_ <<= -shift_amount; - high_bits_ += low_bits_ >> (64 + shift_amount); - low_bits_ <<= -shift_amount; - } else { - low_bits_ >>= shift_amount; - low_bits_ += high_bits_ << (64 - shift_amount); - high_bits_ >>= shift_amount; - } - } - - // Modifies *this to *this MOD (2^power). - // Returns *this DIV (2^power). - int DivModPowerOf2(int power) { - if (power >= 64) { - int result = static_cast(high_bits_ >> (power - 64)); - high_bits_ -= static_cast(result) << (power - 64); - return result; - } else { - uint64_t part_low = low_bits_ >> power; - uint64_t part_high = high_bits_ << (64 - power); - int result = static_cast(part_low + part_high); - high_bits_ = 0; - low_bits_ -= part_low << power; - return result; - } - } - - bool IsZero() const { - return high_bits_ == 0 && low_bits_ == 0; - } - - int BitAt(int position) { - if (position >= 64) { - return static_cast(high_bits_ >> (position - 64)) & 1; - } else { - return static_cast(low_bits_ >> position) & 1; - } - } - - private: - static const uint64_t kMask32 = 0xFFFFFFFF; - // Value == (high_bits_ << 64) + low_bits_ - uint64_t high_bits_; - uint64_t low_bits_; -}; - - -static const int kDoubleSignificandSize = 53; // Includes the hidden bit. - - -static void FillDigits32FixedLength(uint32_t number, int requested_length, - Vector buffer, int* length) { - for (int i = requested_length - 1; i >= 0; --i) { - buffer[(*length) + i] = '0' + number % 10; - number /= 10; - } - *length += requested_length; -} - - -static void FillDigits32(uint32_t number, Vector buffer, int* length) { - int number_length = 0; - // We fill the digits in reverse order and exchange them afterwards. - while (number != 0) { - int digit = number % 10; - number /= 10; - buffer[(*length) + number_length] = static_cast('0' + digit); - number_length++; - } - // Exchange the digits. - int i = *length; - int j = *length + number_length - 1; - while (i < j) { - char tmp = buffer[i]; - buffer[i] = buffer[j]; - buffer[j] = tmp; - i++; - j--; - } - *length += number_length; -} - - -static void FillDigits64FixedLength(uint64_t number, - Vector buffer, int* length) { - const uint32_t kTen7 = 10000000; - // For efficiency cut the number into 3 uint32_t parts, and print those. - uint32_t part2 = static_cast(number % kTen7); - number /= kTen7; - uint32_t part1 = static_cast(number % kTen7); - uint32_t part0 = static_cast(number / kTen7); - - FillDigits32FixedLength(part0, 3, buffer, length); - FillDigits32FixedLength(part1, 7, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); -} - - -static void FillDigits64(uint64_t number, Vector buffer, int* length) { - const uint32_t kTen7 = 10000000; - // For efficiency cut the number into 3 uint32_t parts, and print those. - uint32_t part2 = static_cast(number % kTen7); - number /= kTen7; - uint32_t part1 = static_cast(number % kTen7); - uint32_t part0 = static_cast(number / kTen7); - - if (part0 != 0) { - FillDigits32(part0, buffer, length); - FillDigits32FixedLength(part1, 7, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); - } else if (part1 != 0) { - FillDigits32(part1, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); - } else { - FillDigits32(part2, buffer, length); - } -} - - -static void RoundUp(Vector buffer, int* length, int* decimal_point) { - // An empty buffer represents 0. - if (*length == 0) { - buffer[0] = '1'; - *decimal_point = 1; - *length = 1; - return; - } - // Round the last digit until we either have a digit that was not '9' or until - // we reached the first digit. - buffer[(*length) - 1]++; - for (int i = (*length) - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) { - return; - } - buffer[i] = '0'; - buffer[i - 1]++; - } - // If the first digit is now '0' + 10, we would need to set it to '0' and add - // a '1' in front. However we reach the first digit only if all following - // digits had been '9' before rounding up. Now all trailing digits are '0' and - // we simply switch the first digit to '1' and update the decimal-point - // (indicating that the point is now one digit to the right). - if (buffer[0] == '0' + 10) { - buffer[0] = '1'; - (*decimal_point)++; - } -} - - -// The given fractionals number represents a fixed-point number with binary -// point at bit (-exponent). -// Preconditions: -// -128 <= exponent <= 0. -// 0 <= fractionals * 2^exponent < 1 -// The buffer holds the result. -// The function will round its result. During the rounding-process digits not -// generated by this function might be updated, and the decimal-point variable -// might be updated. If this function generates the digits 99 and the buffer -// already contained "199" (thus yielding a buffer of "19999") then a -// rounding-up will change the contents of the buffer to "20000". -static void FillFractionals(uint64_t fractionals, int exponent, - int fractional_count, Vector buffer, - int* length, int* decimal_point) { - ASSERT(-128 <= exponent && exponent <= 0); - // 'fractionals' is a fixed-point number, with binary point at bit - // (-exponent). Inside the function the non-converted remainder of fractionals - // is a fixed-point number, with binary point at bit 'point'. - if (-exponent <= 64) { - // One 64 bit number is sufficient. - ASSERT(fractionals >> 56 == 0); - int point = -exponent; - for (int i = 0; i < fractional_count; ++i) { - if (fractionals == 0) break; - // Instead of multiplying by 10 we multiply by 5 and adjust the point - // location. This way the fractionals variable will not overflow. - // Invariant at the beginning of the loop: fractionals < 2^point. - // Initially we have: point <= 64 and fractionals < 2^56 - // After each iteration the point is decremented by one. - // Note that 5^3 = 125 < 128 = 2^7. - // Therefore three iterations of this loop will not overflow fractionals - // (even without the subtraction at the end of the loop body). At this - // time point will satisfy point <= 61 and therefore fractionals < 2^point - // and any further multiplication of fractionals by 5 will not overflow. - fractionals *= 5; - point--; - int digit = static_cast(fractionals >> point); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - fractionals -= static_cast(digit) << point; - } - // If the first bit after the point is set we have to round up. - if (((fractionals >> (point - 1)) & 1) == 1) { - RoundUp(buffer, length, decimal_point); - } - } else { // We need 128 bits. - ASSERT(64 < -exponent && -exponent <= 128); - UInt128 fractionals128 = UInt128(fractionals, 0); - fractionals128.Shift(-exponent - 64); - int point = 128; - for (int i = 0; i < fractional_count; ++i) { - if (fractionals128.IsZero()) break; - // As before: instead of multiplying by 10 we multiply by 5 and adjust the - // point location. - // This multiplication will not overflow for the same reasons as before. - fractionals128.Multiply(5); - point--; - int digit = fractionals128.DivModPowerOf2(point); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - } - if (fractionals128.BitAt(point - 1) == 1) { - RoundUp(buffer, length, decimal_point); - } - } -} - - -// Removes leading and trailing zeros. -// If leading zeros are removed then the decimal point position is adjusted. -static void TrimZeros(Vector buffer, int* length, int* decimal_point) { - while (*length > 0 && buffer[(*length) - 1] == '0') { - (*length)--; - } - int first_non_zero = 0; - while (first_non_zero < *length && buffer[first_non_zero] == '0') { - first_non_zero++; - } - if (first_non_zero != 0) { - for (int i = first_non_zero; i < *length; ++i) { - buffer[i - first_non_zero] = buffer[i]; - } - *length -= first_non_zero; - *decimal_point -= first_non_zero; - } -} - - -bool FastFixedDtoa(double v, - int fractional_count, - Vector buffer, - int* length, - int* decimal_point) { - const uint32_t kMaxUInt32 = 0xFFFFFFFF; - uint64_t significand = Double(v).Significand(); - int exponent = Double(v).Exponent(); - // v = significand * 2^exponent (with significand a 53bit integer). - // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we - // don't know how to compute the representation. 2^73 ~= 9.5*10^21. - // If necessary this limit could probably be increased, but we don't need - // more. - if (exponent > 20) return false; - if (fractional_count > 20) return false; - *length = 0; - // At most kDoubleSignificandSize bits of the significand are non-zero. - // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero - // bits: 0..11*..0xxx..53*..xx - if (exponent + kDoubleSignificandSize > 64) { - // The exponent must be > 11. - // - // We know that v = significand * 2^exponent. - // And the exponent > 11. - // We simplify the task by dividing v by 10^17. - // The quotient delivers the first digits, and the remainder fits into a 64 - // bit number. - // Dividing by 10^17 is equivalent to dividing by 5^17*2^17. - const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5); // 5^17 - uint64_t divisor = kFive17; - int divisor_power = 17; - uint64_t dividend = significand; - uint32_t quotient; - uint64_t remainder; - // Let v = f * 2^e with f == significand and e == exponent. - // Then need q (quotient) and r (remainder) as follows: - // v = q * 10^17 + r - // f * 2^e = q * 10^17 + r - // f * 2^e = q * 5^17 * 2^17 + r - // If e > 17 then - // f * 2^(e-17) = q * 5^17 + r/2^17 - // else - // f = q * 5^17 * 2^(17-e) + r/2^e - if (exponent > divisor_power) { - // We only allow exponents of up to 20 and therefore (17 - e) <= 3 - dividend <<= exponent - divisor_power; - quotient = static_cast(dividend / divisor); - remainder = (dividend % divisor) << divisor_power; - } else { - divisor <<= divisor_power - exponent; - quotient = static_cast(dividend / divisor); - remainder = (dividend % divisor) << exponent; - } - FillDigits32(quotient, buffer, length); - FillDigits64FixedLength(remainder, buffer, length); - *decimal_point = *length; - } else if (exponent >= 0) { - // 0 <= exponent <= 11 - significand <<= exponent; - FillDigits64(significand, buffer, length); - *decimal_point = *length; - } else if (exponent > -kDoubleSignificandSize) { - // We have to cut the number. - uint64_t integrals = significand >> -exponent; - uint64_t fractionals = significand - (integrals << -exponent); - if (integrals > kMaxUInt32) { - FillDigits64(integrals, buffer, length); - } else { - FillDigits32(static_cast(integrals), buffer, length); - } - *decimal_point = *length; - FillFractionals(fractionals, exponent, fractional_count, - buffer, length, decimal_point); - } else if (exponent < -128) { - // This configuration (with at most 20 digits) means that all digits must be - // 0. - ASSERT(fractional_count <= 20); - buffer[0] = '\0'; - *length = 0; - *decimal_point = -fractional_count; - } else { - *decimal_point = 0; - FillFractionals(significand, exponent, fractional_count, - buffer, length, decimal_point); - } - TrimZeros(buffer, length, decimal_point); - buffer[*length] = '\0'; - if ((*length) == 0) { - // The string is empty and the decimal_point thus has no importance. Mimic - // Gay's dtoa and and set it to -fractional_count. - *decimal_point = -fractional_count; - } - return true; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/fixed-dtoa.h b/base/poco/Foundation/src/fixed-dtoa.h deleted file mode 100644 index c39eecce51f..00000000000 --- a/base/poco/Foundation/src/fixed-dtoa.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_ -#define DOUBLE_CONVERSION_FIXED_DTOA_H_ - -#include "utils.h" - -namespace double_conversion -{ - -// Produces digits necessary to print a given number with -// 'fractional_count' digits after the decimal point. -// The buffer must be big enough to hold the result plus one terminating null -// character. -// -// The produced digits might be too short in which case the caller has to fill -// the gaps with '0's. -// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and -// decimal_point = -2. -// Halfway cases are rounded towards +/-Infinity (away from 0). The call -// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0. -// The returned buffer may contain digits that would be truncated from the -// shortest representation of the input. -// -// This method only works for some parameters. If it can't handle the input it -// returns false. The output is null-terminated when the function succeeds. -bool FastFixedDtoa(double v, int fractional_count, Vector buffer, int * length, int * decimal_point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_FIXED_DTOA_H_ diff --git a/base/poco/Foundation/src/gzguts.h b/base/poco/Foundation/src/gzguts.h deleted file mode 100644 index 1b964756065..00000000000 --- a/base/poco/Foundation/src/gzguts.h +++ /dev/null @@ -1,194 +0,0 @@ -/* gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#ifdef _LARGEFILE64_SOURCE -# ifndef _LARGEFILE_SOURCE -# define _LARGEFILE_SOURCE 1 -# endif -# ifdef _FILE_OFFSET_BITS -# undef _FILE_OFFSET_BITS -# endif -#endif - -#ifdef HAVE_HIDDEN -# define ZLIB_INTERNAL __attribute__((visibility("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - -#include -#include "zlib.h" -#ifdef STDC -# include -# include -# include -#endif - -#ifndef _POSIX_SOURCE -# define _POSIX_SOURCE -#endif -#include - - -# if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) -# include -# endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define WIDECHAR -#endif - -#ifdef WINAPI_FAMILY -# define open _open -# define read _read -# define write _write -# define close _close -#endif - -#ifdef NO_DEFLATE /* for compatibility with old definition */ -# define NO_GZCOMPRESS -#endif - -#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) -# ifndef HAVE_VSNPRINTF -# define HAVE_VSNPRINTF -# endif -#endif - - -#ifndef HAVE_VSNPRINTF -# ifdef __TURBOC__ -# define NO_vsnprintf -# endif -# ifdef WIN32 -/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ -# if !defined(vsnprintf) && !defined(NO_vsnprintf) -# define vsnprintf _vsnprintf -# endif -# endif -# ifdef __SASC -# define NO_vsnprintf -# endif -# ifdef VMS -# define NO_vsnprintf -# endif -# ifdef __OS400__ -# define NO_vsnprintf -# endif -# ifdef __MVS__ -# define NO_vsnprintf -# endif -#endif - -/* unlike snprintf (which is required in C99), _snprintf does not guarantee - null termination of the result -- however this is only used in gzlib.c where - the result is assured to fit in the space provided */ - -#ifndef local -# define local static -#endif -/* since "static" is used to mean two completely different things in C, we - define "local" for the non-static meaning of "static", for readability - (compile with -Dlocal if your debugger can't find static symbols) */ - -/* gz* functions always use library allocation functions */ -#ifndef STDC -extern voidp malloc OF((uInt size)); -extern void free OF((voidpf ptr)); -#endif - -/* get errno and strerror definition */ -#if defined UNDER_CE -# include -# define zstrerror() gz_strwinerror((DWORD)GetLastError()) -#else -# ifndef NO_STRERROR -# include -# define zstrerror() strerror(errno) -# else -# define zstrerror() "stdio error (consult errno)" -# endif -#endif - -/* provide prototypes for these when building zlib without LFS */ -#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE - 0 == 0 -ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); -ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); -ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); -ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); -#endif - -/* default memLevel */ -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif - -/* default i/o buffer size -- double this for output when reading (this and - twice this must be able to fit in an unsigned type) */ -#define GZBUFSIZE 8192 - -/* gzip modes, also provide a little integrity check on the passed structure */ -#define GZ_NONE 0 -#define GZ_READ 7247 -#define GZ_WRITE 31153 -#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ - -/* values for gz_state how */ -#define LOOK 0 /* look for a gzip header */ -#define COPY 1 /* copy input directly */ -#define GZIP 2 /* decompress a gzip stream */ - -/* internal gzip file state data structure */ -typedef struct -{ - /* exposed contents for gzgetc() macro */ - struct gzFile_s x; /* "x" for exposed */ - /* x.have: number of bytes available at x.next */ - /* x.next: next output data to deliver or write */ - /* x.pos: current position in uncompressed data */ - /* used for both reading and writing */ - int mode; /* see gzip modes above */ - int fd; /* file descriptor */ - char * path; /* path or fd for error messages */ - unsigned size; /* buffer size, zero if not allocated yet */ - unsigned want; /* requested buffer size, default is GZBUFSIZE */ - unsigned char * in; /* input buffer (double-sized when writing) */ - unsigned char * out; /* output buffer (double-sized when reading) */ - int direct; /* 0 if processing gzip, 1 if transparent */ - /* just for reading */ - int how; /* 0: get header, 1: copy, 2: decompress */ - z_off64_t start; /* where the gzip data started, for rewinding */ - int eof; /* true if end of input file reached */ - int past; /* true if read requested past end */ - /* just for writing */ - int level; /* compression level */ - int strategy; /* compression strategy */ - /* seek request */ - z_off64_t skip; /* amount to skip (already rewound if backwards) */ - int seek; /* true if seek request pending */ - /* error information */ - int err; /* error code */ - char * msg; /* error message */ - /* zlib inflate or deflate stream */ - z_stream strm; /* stream structure in-place (not a pointer) */ -} gz_state; -typedef gz_state FAR * gz_statep; - -/* shared functions */ -void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); -#if defined UNDER_CE -char ZLIB_INTERNAL * gz_strwinerror OF((DWORD error)); -#endif - -/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t - value -- needed when comparing unsigned to z_off64_t, which is signed - (possible z_off64_t types off_t, off64_t, and long are all signed) */ -#ifdef INT_MAX -# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) -#else -unsigned ZLIB_INTERNAL gz_intmax OF((void)); -# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) -#endif diff --git a/base/poco/Foundation/src/ieee.h b/base/poco/Foundation/src/ieee.h deleted file mode 100644 index 2cfd39bab66..00000000000 --- a/base/poco/Foundation/src/ieee.h +++ /dev/null @@ -1,458 +0,0 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DOUBLE_H_ -#define DOUBLE_CONVERSION_DOUBLE_H_ - -#include "diy-fp.h" - -namespace double_conversion -{ - -// We assume that doubles and uint64_t have the same endianness. -static uint64_t double_to_uint64(double d) -{ - return BitCast(d); -} -static double uint64_to_double(uint64_t d64) -{ - return BitCast(d64); -} -static uint32_t float_to_uint32(float f) -{ - return BitCast(f); -} -static float uint32_to_float(uint32_t d32) -{ - return BitCast(d32); -} - -// Helper functions for doubles. -class Double -{ -public: - static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); - static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); - static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); - static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); - static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit. - static const int kSignificandSize = 53; - - Double() : d64_(0) { } - explicit Double(double d) : d64_(double_to_uint64(d)) { } - explicit Double(uint64_t d64) : d64_(d64) { } - explicit Double(DiyFp diy_fp) : d64_(DiyFpToUint64(diy_fp)) { } - - // The value encoded by this Double must be greater or equal to +0.0. - // It must not be special (infinity, or NaN). - DiyFp AsDiyFp() const - { - ASSERT(Sign() > 0); - ASSERT(!IsSpecial()); - return DiyFp(Significand(), Exponent()); - } - - // The value encoded by this Double must be strictly greater than 0. - DiyFp AsNormalizedDiyFp() const - { - ASSERT(value() > 0.0); - uint64_t f = Significand(); - int e = Exponent(); - - // The current double could be a denormal. - while ((f & kHiddenBit) == 0) - { - f <<= 1; - e--; - } - // Do the final shifts in one go. - f <<= DiyFp::kSignificandSize - kSignificandSize; - e -= DiyFp::kSignificandSize - kSignificandSize; - return DiyFp(f, e); - } - - // Returns the double's bit as uint64. - uint64_t AsUint64() const { return d64_; } - - // Returns the next greater double. Returns +infinity on input +infinity. - double NextDouble() const - { - if (d64_ == kInfinity) - return Double(kInfinity).value(); - if (Sign() < 0 && Significand() == 0) - { - // -0.0 - return 0.0; - } - if (Sign() < 0) - { - return Double(d64_ - 1).value(); - } - else - { - return Double(d64_ + 1).value(); - } - } - - double PreviousDouble() const - { - if (d64_ == (kInfinity | kSignMask)) - return -Double::Infinity(); - if (Sign() < 0) - { - return Double(d64_ + 1).value(); - } - else - { - if (Significand() == 0) - return -0.0; - return Double(d64_ - 1).value(); - } - } - - int Exponent() const - { - if (IsDenormal()) - return kDenormalExponent; - - uint64_t d64 = AsUint64(); - int biased_e = static_cast((d64 & kExponentMask) >> kPhysicalSignificandSize); - return biased_e - kExponentBias; - } - - uint64_t Significand() const - { - uint64_t d64 = AsUint64(); - uint64_t significand = d64 & kSignificandMask; - if (!IsDenormal()) - { - return significand + kHiddenBit; - } - else - { - return significand; - } - } - - // Returns true if the double is a denormal. - bool IsDenormal() const - { - uint64_t d64 = AsUint64(); - return (d64 & kExponentMask) == 0; - } - - // We consider denormals not to be special. - // Hence only Infinity and NaN are special. - bool IsSpecial() const - { - uint64_t d64 = AsUint64(); - return (d64 & kExponentMask) == kExponentMask; - } - - bool IsNan() const - { - uint64_t d64 = AsUint64(); - return ((d64 & kExponentMask) == kExponentMask) && ((d64 & kSignificandMask) != 0); - } - - bool IsInfinite() const - { - uint64_t d64 = AsUint64(); - return ((d64 & kExponentMask) == kExponentMask) && ((d64 & kSignificandMask) == 0); - } - - int Sign() const - { - uint64_t d64 = AsUint64(); - return (d64 & kSignMask) == 0 ? 1 : -1; - } - - // Precondition: the value encoded by this Double must be greater or equal - // than +0.0. - DiyFp UpperBoundary() const - { - ASSERT(Sign() > 0); - return DiyFp(Significand() * 2 + 1, Exponent() - 1); - } - - // Computes the two boundaries of this. - // The bigger boundary (m_plus) is normalized. The lower boundary has the same - // exponent as m_plus. - // Precondition: the value encoded by this Double must be greater than 0. - void NormalizedBoundaries(DiyFp * out_m_minus, DiyFp * out_m_plus) const - { - ASSERT(value() > 0.0); - DiyFp v = this->AsDiyFp(); - DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); - DiyFp m_minus; - if (LowerBoundaryIsCloser()) - { - m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); - } - else - { - m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); - } - m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); - m_minus.set_e(m_plus.e()); - *out_m_plus = m_plus; - *out_m_minus = m_minus; - } - - bool LowerBoundaryIsCloser() const - { - // The boundary is closer if the significand is of the form f == 2^p-1 then - // the lower boundary is closer. - // Think of v = 1000e10 and v- = 9999e9. - // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but - // at a distance of 1e8. - // The only exception is for the smallest normal: the largest denormal is - // at the same distance as its successor. - // Note: denormals have the same exponent as the smallest normals. - bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); - return physical_significand_is_zero && (Exponent() != kDenormalExponent); - } - - double value() const { return uint64_to_double(d64_); } - - // Returns the significand size for a given order of magnitude. - // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. - // This function returns the number of significant binary digits v will have - // once it's encoded into a double. In almost all cases this is equal to - // kSignificandSize. The only exceptions are denormals. They start with - // leading zeroes and their effective significand-size is hence smaller. - static int SignificandSizeForOrderOfMagnitude(int order) - { - if (order >= (kDenormalExponent + kSignificandSize)) - { - return kSignificandSize; - } - if (order <= kDenormalExponent) - return 0; - return order - kDenormalExponent; - } - - static double Infinity() { return Double(kInfinity).value(); } - - static double NaN() { return Double(kNaN).value(); } - -private: - static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; - static const int kDenormalExponent = -kExponentBias + 1; - static const int kMaxExponent = 0x7FF - kExponentBias; - static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); - static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); - - const uint64_t d64_; - - static uint64_t DiyFpToUint64(DiyFp diy_fp) - { - uint64_t significand = diy_fp.f(); - int exponent = diy_fp.e(); - while (significand > kHiddenBit + kSignificandMask) - { - significand >>= 1; - exponent++; - } - if (exponent >= kMaxExponent) - { - return kInfinity; - } - if (exponent < kDenormalExponent) - { - return 0; - } - while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) - { - significand <<= 1; - exponent--; - } - uint64_t biased_exponent; - if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) - { - biased_exponent = 0; - } - else - { - biased_exponent = static_cast(exponent + kExponentBias); - } - return (significand & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize); - } - - DISALLOW_COPY_AND_ASSIGN(Double); -}; - -class Single -{ -public: - static const uint32_t kSignMask = 0x80000000; - static const uint32_t kExponentMask = 0x7F800000; - static const uint32_t kSignificandMask = 0x007FFFFF; - static const uint32_t kHiddenBit = 0x00800000; - static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit. - static const int kSignificandSize = 24; - - Single() : d32_(0) { } - explicit Single(float f) : d32_(float_to_uint32(f)) { } - explicit Single(uint32_t d32) : d32_(d32) { } - - // The value encoded by this Single must be greater or equal to +0.0. - // It must not be special (infinity, or NaN). - DiyFp AsDiyFp() const - { - ASSERT(Sign() > 0); - ASSERT(!IsSpecial()); - return DiyFp(Significand(), Exponent()); - } - - // Returns the single's bit as uint64. - uint32_t AsUint32() const { return d32_; } - - int Exponent() const - { - if (IsDenormal()) - return kDenormalExponent; - - uint32_t d32 = AsUint32(); - int biased_e = static_cast((d32 & kExponentMask) >> kPhysicalSignificandSize); - return biased_e - kExponentBias; - } - - uint32_t Significand() const - { - uint32_t d32 = AsUint32(); - uint32_t significand = d32 & kSignificandMask; - if (!IsDenormal()) - { - return significand + kHiddenBit; - } - else - { - return significand; - } - } - - // Returns true if the single is a denormal. - bool IsDenormal() const - { - uint32_t d32 = AsUint32(); - return (d32 & kExponentMask) == 0; - } - - // We consider denormals not to be special. - // Hence only Infinity and NaN are special. - bool IsSpecial() const - { - uint32_t d32 = AsUint32(); - return (d32 & kExponentMask) == kExponentMask; - } - - bool IsNan() const - { - uint32_t d32 = AsUint32(); - return ((d32 & kExponentMask) == kExponentMask) && ((d32 & kSignificandMask) != 0); - } - - bool IsInfinite() const - { - uint32_t d32 = AsUint32(); - return ((d32 & kExponentMask) == kExponentMask) && ((d32 & kSignificandMask) == 0); - } - - int Sign() const - { - uint32_t d32 = AsUint32(); - return (d32 & kSignMask) == 0 ? 1 : -1; - } - - // Computes the two boundaries of this. - // The bigger boundary (m_plus) is normalized. The lower boundary has the same - // exponent as m_plus. - // Precondition: the value encoded by this Single must be greater than 0. - void NormalizedBoundaries(DiyFp * out_m_minus, DiyFp * out_m_plus) const - { - ASSERT(value() > 0.0); - DiyFp v = this->AsDiyFp(); - DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); - DiyFp m_minus; - if (LowerBoundaryIsCloser()) - { - m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); - } - else - { - m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); - } - m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); - m_minus.set_e(m_plus.e()); - *out_m_plus = m_plus; - *out_m_minus = m_minus; - } - - // Precondition: the value encoded by this Single must be greater or equal - // than +0.0. - DiyFp UpperBoundary() const - { - ASSERT(Sign() > 0); - return DiyFp(Significand() * 2 + 1, Exponent() - 1); - } - - bool LowerBoundaryIsCloser() const - { - // The boundary is closer if the significand is of the form f == 2^p-1 then - // the lower boundary is closer. - // Think of v = 1000e10 and v- = 9999e9. - // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but - // at a distance of 1e8. - // The only exception is for the smallest normal: the largest denormal is - // at the same distance as its successor. - // Note: denormals have the same exponent as the smallest normals. - bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); - return physical_significand_is_zero && (Exponent() != kDenormalExponent); - } - - float value() const { return uint32_to_float(d32_); } - - static float Infinity() { return Single(kInfinity).value(); } - - static float NaN() { return Single(kNaN).value(); } - -private: - static const int kExponentBias = 0x7F + kPhysicalSignificandSize; - static const int kDenormalExponent = -kExponentBias + 1; - static const int kMaxExponent = 0xFF - kExponentBias; - static const uint32_t kInfinity = 0x7F800000; - static const uint32_t kNaN = 0x7FC00000; - - const uint32_t d32_; - - DISALLOW_COPY_AND_ASSIGN(Single); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DOUBLE_H_ diff --git a/base/poco/Foundation/src/infback.c b/base/poco/Foundation/src/infback.c deleted file mode 100644 index 59679ecbfc5..00000000000 --- a/base/poco/Foundation/src/infback.c +++ /dev/null @@ -1,640 +0,0 @@ -/* infback.c -- inflate using a call-back interface - * Copyright (C) 1995-2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - This code is largely copied from inflate.c. Normally either infback.o or - inflate.o would be linked into an application--not both. The interface - with inffast.c is retained so that optimized assembler-coded versions of - inflate_fast() can be used with either inflate.c or infback.c. - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -/* function prototypes */ -local void fixedtables OF((struct inflate_state FAR *state)); - -/* - strm provides memory allocation functions in zalloc and zfree, or - Z_NULL to use the library memory allocation functions. - - windowBits is in the range 8..15, and window is a user-supplied - window and output buffer that is 2**windowBits bytes. - */ -int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) -z_streamp strm; -int windowBits; -unsigned char FAR *window; -const char *version; -int stream_size; -{ - struct inflate_state FAR *state; - - if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || - stream_size != (int)(sizeof(z_stream))) - return Z_VERSION_ERROR; - if (strm == Z_NULL || window == Z_NULL || - windowBits < 8 || windowBits > 15) - return Z_STREAM_ERROR; - strm->msg = Z_NULL; /* in case we return an error */ - if (strm->zalloc == (alloc_func)0) { -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; -#endif - } - if (strm->zfree == (free_func)0) -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zfree = zcfree; -#endif - state = (struct inflate_state FAR *)ZALLOC(strm, 1, - sizeof(struct inflate_state)); - if (state == Z_NULL) return Z_MEM_ERROR; - Tracev((stderr, "inflate: allocated\n")); - strm->state = (struct internal_state FAR *)state; - state->dmax = 32768U; - state->wbits = (uInt)windowBits; - state->wsize = 1U << windowBits; - state->window = window; - state->wnext = 0; - state->whave = 0; - return Z_OK; -} - -/* - Return state with length and distance decoding tables and index sizes set to - fixed code decoding. Normally this returns fixed tables from inffixed.h. - If BUILDFIXED is defined, then instead this routine builds the tables the - first time it's called, and returns those tables the first time and - thereafter. This reduces the size of the code by about 2K bytes, in - exchange for a little execution time. However, BUILDFIXED should not be - used for threaded applications, since the rewriting of the tables and virgin - may not be thread-safe. - */ -local void fixedtables(state) -struct inflate_state FAR *state; -{ -#ifdef BUILDFIXED - static int virgin = 1; - static code *lenfix, *distfix; - static code fixed[544]; - - /* build fixed huffman tables if first call (may not be thread safe) */ - if (virgin) { - unsigned sym, bits; - static code *next; - - /* literal/length table */ - sym = 0; - while (sym < 144) state->lens[sym++] = 8; - while (sym < 256) state->lens[sym++] = 9; - while (sym < 280) state->lens[sym++] = 7; - while (sym < 288) state->lens[sym++] = 8; - next = fixed; - lenfix = next; - bits = 9; - inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); - - /* distance table */ - sym = 0; - while (sym < 32) state->lens[sym++] = 5; - distfix = next; - bits = 5; - inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); - - /* do this just once */ - virgin = 0; - } -#else /* !BUILDFIXED */ -# include "inffixed.h" -#endif /* BUILDFIXED */ - state->lencode = lenfix; - state->lenbits = 9; - state->distcode = distfix; - state->distbits = 5; -} - -/* Macros for inflateBack(): */ - -/* Load returned state from inflate_fast() */ -#define LOAD() \ - do { \ - put = strm->next_out; \ - left = strm->avail_out; \ - next = strm->next_in; \ - have = strm->avail_in; \ - hold = state->hold; \ - bits = state->bits; \ - } while (0) - -/* Set state from registers for inflate_fast() */ -#define RESTORE() \ - do { \ - strm->next_out = put; \ - strm->avail_out = left; \ - strm->next_in = next; \ - strm->avail_in = have; \ - state->hold = hold; \ - state->bits = bits; \ - } while (0) - -/* Clear the input bit accumulator */ -#define INITBITS() \ - do { \ - hold = 0; \ - bits = 0; \ - } while (0) - -/* Assure that some input is available. If input is requested, but denied, - then return a Z_BUF_ERROR from inflateBack(). */ -#define PULL() \ - do { \ - if (have == 0) { \ - have = in(in_desc, &next); \ - if (have == 0) { \ - next = Z_NULL; \ - ret = Z_BUF_ERROR; \ - goto inf_leave; \ - } \ - } \ - } while (0) - -/* Get a byte of input into the bit accumulator, or return from inflateBack() - with an error if there is no input available. */ -#define PULLBYTE() \ - do { \ - PULL(); \ - have--; \ - hold += (unsigned long)(*next++) << bits; \ - bits += 8; \ - } while (0) - -/* Assure that there are at least n bits in the bit accumulator. If there is - not enough available input to do that, then return from inflateBack() with - an error. */ -#define NEEDBITS(n) \ - do { \ - while (bits < (unsigned)(n)) \ - PULLBYTE(); \ - } while (0) - -/* Return the low n bits of the bit accumulator (n < 16) */ -#define BITS(n) \ - ((unsigned)hold & ((1U << (n)) - 1)) - -/* Remove n bits from the bit accumulator */ -#define DROPBITS(n) \ - do { \ - hold >>= (n); \ - bits -= (unsigned)(n); \ - } while (0) - -/* Remove zero to seven bits as needed to go to a byte boundary */ -#define BYTEBITS() \ - do { \ - hold >>= bits & 7; \ - bits -= bits & 7; \ - } while (0) - -/* Assure that some output space is available, by writing out the window - if it's full. If the write fails, return from inflateBack() with a - Z_BUF_ERROR. */ -#define ROOM() \ - do { \ - if (left == 0) { \ - put = state->window; \ - left = state->wsize; \ - state->whave = left; \ - if (out(out_desc, put, left)) { \ - ret = Z_BUF_ERROR; \ - goto inf_leave; \ - } \ - } \ - } while (0) - -/* - strm provides the memory allocation functions and window buffer on input, - and provides information on the unused input on return. For Z_DATA_ERROR - returns, strm will also provide an error message. - - in() and out() are the call-back input and output functions. When - inflateBack() needs more input, it calls in(). When inflateBack() has - filled the window with output, or when it completes with data in the - window, it calls out() to write out the data. The application must not - change the provided input until in() is called again or inflateBack() - returns. The application must not change the window/output buffer until - inflateBack() returns. - - in() and out() are called with a descriptor parameter provided in the - inflateBack() call. This parameter can be a structure that provides the - information required to do the read or write, as well as accumulated - information on the input and output such as totals and check values. - - in() should return zero on failure. out() should return non-zero on - failure. If either in() or out() fails, than inflateBack() returns a - Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it - was in() or out() that caused in the error. Otherwise, inflateBack() - returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format - error, or Z_MEM_ERROR if it could not allocate memory for the state. - inflateBack() can also return Z_STREAM_ERROR if the input parameters - are not correct, i.e. strm is Z_NULL or the state was not initialized. - */ -int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) -z_streamp strm; -in_func in; -void FAR *in_desc; -out_func out; -void FAR *out_desc; -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *next; /* next input */ - unsigned char FAR *put; /* next output */ - unsigned have, left; /* available input and output */ - unsigned long hold; /* bit buffer */ - unsigned bits; /* bits in bit buffer */ - unsigned copy; /* number of stored or match bytes to copy */ - unsigned char FAR *from; /* where to copy match bytes from */ - code here; /* current decoding table entry */ - code last; /* parent table entry */ - unsigned len; /* length to copy for repeats, bits to drop */ - int ret; /* return code */ - static const unsigned short order[19] = /* permutation of code lengths */ - {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - - /* Check that the strm exists and that the state was initialized */ - if (strm == Z_NULL || strm->state == Z_NULL) - return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - - /* Reset the state */ - strm->msg = Z_NULL; - state->mode = TYPE; - state->last = 0; - state->whave = 0; - next = strm->next_in; - have = next != Z_NULL ? strm->avail_in : 0; - hold = 0; - bits = 0; - put = state->window; - left = state->wsize; - - /* Inflate until end of block marked as last */ - for (;;) - switch (state->mode) { - case TYPE: - /* determine and dispatch block type */ - if (state->last) { - BYTEBITS(); - state->mode = DONE; - break; - } - NEEDBITS(3); - state->last = BITS(1); - DROPBITS(1); - switch (BITS(2)) { - case 0: /* stored block */ - Tracev((stderr, "inflate: stored block%s\n", - state->last ? " (last)" : "")); - state->mode = STORED; - break; - case 1: /* fixed block */ - fixedtables(state); - Tracev((stderr, "inflate: fixed codes block%s\n", - state->last ? " (last)" : "")); - state->mode = LEN; /* decode codes */ - break; - case 2: /* dynamic block */ - Tracev((stderr, "inflate: dynamic codes block%s\n", - state->last ? " (last)" : "")); - state->mode = TABLE; - break; - case 3: - strm->msg = (char *)"invalid block type"; - state->mode = BAD; - } - DROPBITS(2); - break; - - case STORED: - /* get and verify stored block length */ - BYTEBITS(); /* go to byte boundary */ - NEEDBITS(32); - if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { - strm->msg = (char *)"invalid stored block lengths"; - state->mode = BAD; - break; - } - state->length = (unsigned)hold & 0xffff; - Tracev((stderr, "inflate: stored length %u\n", - state->length)); - INITBITS(); - - /* copy stored block from input to output */ - while (state->length != 0) { - copy = state->length; - PULL(); - ROOM(); - if (copy > have) copy = have; - if (copy > left) copy = left; - zmemcpy(put, next, copy); - have -= copy; - next += copy; - left -= copy; - put += copy; - state->length -= copy; - } - Tracev((stderr, "inflate: stored end\n")); - state->mode = TYPE; - break; - - case TABLE: - /* get dynamic table entries descriptor */ - NEEDBITS(14); - state->nlen = BITS(5) + 257; - DROPBITS(5); - state->ndist = BITS(5) + 1; - DROPBITS(5); - state->ncode = BITS(4) + 4; - DROPBITS(4); -#ifndef PKZIP_BUG_WORKAROUND - if (state->nlen > 286 || state->ndist > 30) { - strm->msg = (char *)"too many length or distance symbols"; - state->mode = BAD; - break; - } -#endif - Tracev((stderr, "inflate: table sizes ok\n")); - - /* get code length code lengths (not a typo) */ - state->have = 0; - while (state->have < state->ncode) { - NEEDBITS(3); - state->lens[order[state->have++]] = (unsigned short)BITS(3); - DROPBITS(3); - } - while (state->have < 19) - state->lens[order[state->have++]] = 0; - state->next = state->codes; - state->lencode = (code const FAR *)(state->next); - state->lenbits = 7; - ret = inflate_table(CODES, state->lens, 19, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid code lengths set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: code lengths ok\n")); - - /* get length and distance code code lengths */ - state->have = 0; - while (state->have < state->nlen + state->ndist) { - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.val < 16) { - DROPBITS(here.bits); - state->lens[state->have++] = here.val; - } - else { - if (here.val == 16) { - NEEDBITS(here.bits + 2); - DROPBITS(here.bits); - if (state->have == 0) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - len = (unsigned)(state->lens[state->have - 1]); - copy = 3 + BITS(2); - DROPBITS(2); - } - else if (here.val == 17) { - NEEDBITS(here.bits + 3); - DROPBITS(here.bits); - len = 0; - copy = 3 + BITS(3); - DROPBITS(3); - } - else { - NEEDBITS(here.bits + 7); - DROPBITS(here.bits); - len = 0; - copy = 11 + BITS(7); - DROPBITS(7); - } - if (state->have + copy > state->nlen + state->ndist) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - while (copy--) - state->lens[state->have++] = (unsigned short)len; - } - } - - /* handle error breaks in while */ - if (state->mode == BAD) break; - - /* check for end-of-block code (better have one) */ - if (state->lens[256] == 0) { - strm->msg = (char *)"invalid code -- missing end-of-block"; - state->mode = BAD; - break; - } - - /* build code tables -- note: do not change the lenbits or distbits - values here (9 and 6) without reading the comments in inftrees.h - concerning the ENOUGH constants, which depend on those values */ - state->next = state->codes; - state->lencode = (code const FAR *)(state->next); - state->lenbits = 9; - ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid literal/lengths set"; - state->mode = BAD; - break; - } - state->distcode = (code const FAR *)(state->next); - state->distbits = 6; - ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, - &(state->next), &(state->distbits), state->work); - if (ret) { - strm->msg = (char *)"invalid distances set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: codes ok\n")); - state->mode = LEN; - - case LEN: - /* use inflate_fast() if we have enough input and output */ - if (have >= 6 && left >= 258) { - RESTORE(); - if (state->whave < state->wsize) - state->whave = state->wsize - left; - inflate_fast(strm, state->wsize); - LOAD(); - break; - } - - /* get a literal, length, or end-of-block code */ - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.op && (here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->lencode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - } - DROPBITS(here.bits); - state->length = (unsigned)here.val; - - /* process literal */ - if (here.op == 0) { - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - ROOM(); - *put++ = (unsigned char)(state->length); - left--; - state->mode = LEN; - break; - } - - /* process end of block */ - if (here.op & 32) { - Tracevv((stderr, "inflate: end of block\n")); - state->mode = TYPE; - break; - } - - /* invalid code */ - if (here.op & 64) { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - - /* length code -- get extra bits, if any */ - state->extra = (unsigned)(here.op) & 15; - if (state->extra != 0) { - NEEDBITS(state->extra); - state->length += BITS(state->extra); - DROPBITS(state->extra); - } - Tracevv((stderr, "inflate: length %u\n", state->length)); - - /* get distance code */ - for (;;) { - here = state->distcode[BITS(state->distbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if ((here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->distcode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - } - DROPBITS(here.bits); - if (here.op & 64) { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - state->offset = (unsigned)here.val; - - /* get distance extra bits, if any */ - state->extra = (unsigned)(here.op) & 15; - if (state->extra != 0) { - NEEDBITS(state->extra); - state->offset += BITS(state->extra); - DROPBITS(state->extra); - } - if (state->offset > state->wsize - (state->whave < state->wsize ? - left : 0)) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } - Tracevv((stderr, "inflate: distance %u\n", state->offset)); - - /* copy match from window to output */ - do { - ROOM(); - copy = state->wsize - state->offset; - if (copy < left) { - from = put + copy; - copy = left - copy; - } - else { - from = put - state->offset; - copy = left; - } - if (copy > state->length) copy = state->length; - state->length -= copy; - left -= copy; - do { - *put++ = *from++; - } while (--copy); - } while (state->length != 0); - break; - - case DONE: - /* inflate stream terminated properly -- write leftover output */ - ret = Z_STREAM_END; - if (left < state->wsize) { - if (out(out_desc, state->window, state->wsize - left)) - ret = Z_BUF_ERROR; - } - goto inf_leave; - - case BAD: - ret = Z_DATA_ERROR; - goto inf_leave; - - default: /* can't happen, but makes compilers happy */ - ret = Z_STREAM_ERROR; - goto inf_leave; - } - - /* Return unused input */ - inf_leave: - strm->next_in = next; - strm->avail_in = have; - return ret; -} - -int ZEXPORT inflateBackEnd(strm) -z_streamp strm; -{ - if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) - return Z_STREAM_ERROR; - ZFREE(strm, strm->state); - strm->state = Z_NULL; - Tracev((stderr, "inflate: end\n")); - return Z_OK; -} diff --git a/base/poco/Foundation/src/inffast.c b/base/poco/Foundation/src/inffast.c deleted file mode 100644 index 0dbd1dbc09f..00000000000 --- a/base/poco/Foundation/src/inffast.c +++ /dev/null @@ -1,323 +0,0 @@ -/* inffast.c -- fast decoding - * Copyright (C) 1995-2017 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -#ifdef ASMINF -# pragma message("Assembler code may have bugs -- use at your own risk") -#else - -/* - Decode literal, length, and distance codes and write out the resulting - literal and match bytes until either not enough input or output is - available, an end-of-block is encountered, or a data error is encountered. - When large enough input and output buffers are supplied to inflate(), for - example, a 16K input buffer and a 64K output buffer, more than 95% of the - inflate execution time is spent in this routine. - - Entry assumptions: - - state->mode == LEN - strm->avail_in >= 6 - strm->avail_out >= 258 - start >= strm->avail_out - state->bits < 8 - - On return, state->mode is one of: - - LEN -- ran out of enough output space or enough available input - TYPE -- reached end of block code, inflate() to interpret next block - BAD -- error in block data - - Notes: - - - The maximum input bits used by a length/distance pair is 15 bits for the - length code, 5 bits for the length extra, 15 bits for the distance code, - and 13 bits for the distance extra. This totals 48 bits, or six bytes. - Therefore if strm->avail_in >= 6, then there is enough input to avoid - checking for available input while decoding. - - - The maximum bytes that a single length/distance pair can output is 258 - bytes, which is the maximum length that can be coded. inflate_fast() - requires strm->avail_out >= 258 for each loop to avoid checking for - output space. - */ -void ZLIB_INTERNAL inflate_fast(strm, start) -z_streamp strm; -unsigned start; /* inflate()'s starting value for strm->avail_out */ -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *in; /* local strm->next_in */ - z_const unsigned char FAR *last; /* have enough input while in < last */ - unsigned char FAR *out; /* local strm->next_out */ - unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ - unsigned char FAR *end; /* while out < end, enough space available */ -#ifdef INFLATE_STRICT - unsigned dmax; /* maximum distance from zlib header */ -#endif - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ - unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ - unsigned long hold; /* local strm->hold */ - unsigned bits; /* local strm->bits */ - code const FAR *lcode; /* local strm->lencode */ - code const FAR *dcode; /* local strm->distcode */ - unsigned lmask; /* mask for first level of length codes */ - unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ - unsigned op; /* code bits, operation, extra bits, or */ - /* window position, window bytes to copy */ - unsigned len; /* match length, unused bytes */ - unsigned dist; /* match distance */ - unsigned char FAR *from; /* where to copy match from */ - - /* copy state to local variables */ - state = (struct inflate_state FAR *)strm->state; - in = strm->next_in; - last = in + (strm->avail_in - 5); - out = strm->next_out; - beg = out - (start - strm->avail_out); - end = out + (strm->avail_out - 257); -#ifdef INFLATE_STRICT - dmax = state->dmax; -#endif - wsize = state->wsize; - whave = state->whave; - wnext = state->wnext; - window = state->window; - hold = state->hold; - bits = state->bits; - lcode = state->lencode; - dcode = state->distcode; - lmask = (1U << state->lenbits) - 1; - dmask = (1U << state->distbits) - 1; - - /* decode literals and length/distances until end-of-block or not enough - input data or output space */ - do { - if (bits < 15) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - here = lcode[hold & lmask]; - dolen: - op = (unsigned)(here.bits); - hold >>= op; - bits -= op; - op = (unsigned)(here.op); - if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); - } - else if (op & 16) { /* length base */ - len = (unsigned)(here.val); - op &= 15; /* number of extra bits */ - if (op) { - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - len += (unsigned)hold & ((1U << op) - 1); - hold >>= op; - bits -= op; - } - Tracevv((stderr, "inflate: length %u\n", len)); - if (bits < 15) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - here = dcode[hold & dmask]; - dodist: - op = (unsigned)(here.bits); - hold >>= op; - bits -= op; - op = (unsigned)(here.op); - if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); - op &= 15; /* number of extra bits */ - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - } - dist += (unsigned)hold & ((1U << op) - 1); -#ifdef INFLATE_STRICT - if (dist > dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - hold >>= op; - bits -= op; - Tracevv((stderr, "inflate: distance %u\n", dist)); - op = (unsigned)(out - beg); /* max distance in output */ - if (dist > op) { /* see if copy from window */ - op = dist - op; /* distance back in window */ - if (op > whave) { - if (state->sane) { - strm->msg = - (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - if (len <= op - whave) { - do { - *out++ = 0; - } while (--len); - continue; - } - len -= op - whave; - do { - *out++ = 0; - } while (--op > whave); - if (op == 0) { - from = out - dist; - do { - *out++ = *from++; - } while (--len); - continue; - } -#endif - } - from = window; - if (wnext == 0) { /* very common case */ - from += wsize - op; - if (op < len) { /* some from window */ - len -= op; - do { - *out++ = *from++; - } while (--op); - from = out - dist; /* rest from output */ - } - } - else if (wnext < op) { /* wrap around window */ - from += wsize + wnext - op; - op -= wnext; - if (op < len) { /* some from end of window */ - len -= op; - do { - *out++ = *from++; - } while (--op); - from = window; - if (wnext < len) { /* some from start of window */ - op = wnext; - len -= op; - do { - *out++ = *from++; - } while (--op); - from = out - dist; /* rest from output */ - } - } - } - else { /* contiguous in window */ - from += wnext - op; - if (op < len) { /* some from window */ - len -= op; - do { - *out++ = *from++; - } while (--op); - from = out - dist; /* rest from output */ - } - } - while (len > 2) { - *out++ = *from++; - *out++ = *from++; - *out++ = *from++; - len -= 3; - } - if (len) { - *out++ = *from++; - if (len > 1) - *out++ = *from++; - } - } - else { - from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - *out++ = *from++; - *out++ = *from++; - *out++ = *from++; - len -= 3; - } while (len > 2); - if (len) { - *out++ = *from++; - if (len > 1) - *out++ = *from++; - } - } - } - else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; - goto dodist; - } - else { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - } - else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; - goto dolen; - } - else if (op & 32) { /* end-of-block */ - Tracevv((stderr, "inflate: end of block\n")); - state->mode = TYPE; - break; - } - else { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - } while (in < last && out < end); - - /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ - len = bits >> 3; - in -= len; - bits -= len << 3; - hold &= (1U << bits) - 1; - - /* update state and return */ - strm->next_in = in; - strm->next_out = out; - strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); - strm->avail_out = (unsigned)(out < end ? - 257 + (end - out) : 257 - (out - end)); - state->hold = hold; - state->bits = bits; - return; -} - -/* - inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - - Using bit fields for code structure - - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and wnext == 0 - - Special case for distance > 1 copies to do overlapped load and store copy - - Explicit branch predictions (based on measured branch probabilities) - - Deferring match copy and interspersed it with decoding subsequent codes - - Swapping literal/length else - - Swapping window/direct else - - Larger unrolled copy loops (three is about right) - - Moving len -= 3 statement into middle of loop - */ - -#endif /* !ASMINF */ diff --git a/base/poco/Foundation/src/inffast.h b/base/poco/Foundation/src/inffast.h deleted file mode 100644 index e5c1aa4ca8c..00000000000 --- a/base/poco/Foundation/src/inffast.h +++ /dev/null @@ -1,11 +0,0 @@ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2003, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/base/poco/Foundation/src/inffixed.h b/base/poco/Foundation/src/inffixed.h deleted file mode 100644 index 4a39de52ca2..00000000000 --- a/base/poco/Foundation/src/inffixed.h +++ /dev/null @@ -1,68 +0,0 @@ -/* inffixed.h -- table for decoding fixed codes - * Generated automatically by makefixed(). - */ - -/* WARNING: this file should *not* be used by applications. - It is part of the implementation of this library and is - subject to change. Applications should only use zlib.h. - */ - -static const code lenfix[512] = { - {96, 7, 0}, {0, 8, 80}, {0, 8, 16}, {20, 8, 115}, {18, 7, 31}, {0, 8, 112}, {0, 8, 48}, {0, 9, 192}, {16, 7, 10}, {0, 8, 96}, - {0, 8, 32}, {0, 9, 160}, {0, 8, 0}, {0, 8, 128}, {0, 8, 64}, {0, 9, 224}, {16, 7, 6}, {0, 8, 88}, {0, 8, 24}, {0, 9, 144}, - {19, 7, 59}, {0, 8, 120}, {0, 8, 56}, {0, 9, 208}, {17, 7, 17}, {0, 8, 104}, {0, 8, 40}, {0, 9, 176}, {0, 8, 8}, {0, 8, 136}, - {0, 8, 72}, {0, 9, 240}, {16, 7, 4}, {0, 8, 84}, {0, 8, 20}, {21, 8, 227}, {19, 7, 43}, {0, 8, 116}, {0, 8, 52}, {0, 9, 200}, - {17, 7, 13}, {0, 8, 100}, {0, 8, 36}, {0, 9, 168}, {0, 8, 4}, {0, 8, 132}, {0, 8, 68}, {0, 9, 232}, {16, 7, 8}, {0, 8, 92}, - {0, 8, 28}, {0, 9, 152}, {20, 7, 83}, {0, 8, 124}, {0, 8, 60}, {0, 9, 216}, {18, 7, 23}, {0, 8, 108}, {0, 8, 44}, {0, 9, 184}, - {0, 8, 12}, {0, 8, 140}, {0, 8, 76}, {0, 9, 248}, {16, 7, 3}, {0, 8, 82}, {0, 8, 18}, {21, 8, 163}, {19, 7, 35}, {0, 8, 114}, - {0, 8, 50}, {0, 9, 196}, {17, 7, 11}, {0, 8, 98}, {0, 8, 34}, {0, 9, 164}, {0, 8, 2}, {0, 8, 130}, {0, 8, 66}, {0, 9, 228}, - {16, 7, 7}, {0, 8, 90}, {0, 8, 26}, {0, 9, 148}, {20, 7, 67}, {0, 8, 122}, {0, 8, 58}, {0, 9, 212}, {18, 7, 19}, {0, 8, 106}, - {0, 8, 42}, {0, 9, 180}, {0, 8, 10}, {0, 8, 138}, {0, 8, 74}, {0, 9, 244}, {16, 7, 5}, {0, 8, 86}, {0, 8, 22}, {64, 8, 0}, - {19, 7, 51}, {0, 8, 118}, {0, 8, 54}, {0, 9, 204}, {17, 7, 15}, {0, 8, 102}, {0, 8, 38}, {0, 9, 172}, {0, 8, 6}, {0, 8, 134}, - {0, 8, 70}, {0, 9, 236}, {16, 7, 9}, {0, 8, 94}, {0, 8, 30}, {0, 9, 156}, {20, 7, 99}, {0, 8, 126}, {0, 8, 62}, {0, 9, 220}, - {18, 7, 27}, {0, 8, 110}, {0, 8, 46}, {0, 9, 188}, {0, 8, 14}, {0, 8, 142}, {0, 8, 78}, {0, 9, 252}, {96, 7, 0}, {0, 8, 81}, - {0, 8, 17}, {21, 8, 131}, {18, 7, 31}, {0, 8, 113}, {0, 8, 49}, {0, 9, 194}, {16, 7, 10}, {0, 8, 97}, {0, 8, 33}, {0, 9, 162}, - {0, 8, 1}, {0, 8, 129}, {0, 8, 65}, {0, 9, 226}, {16, 7, 6}, {0, 8, 89}, {0, 8, 25}, {0, 9, 146}, {19, 7, 59}, {0, 8, 121}, - {0, 8, 57}, {0, 9, 210}, {17, 7, 17}, {0, 8, 105}, {0, 8, 41}, {0, 9, 178}, {0, 8, 9}, {0, 8, 137}, {0, 8, 73}, {0, 9, 242}, - {16, 7, 4}, {0, 8, 85}, {0, 8, 21}, {16, 8, 258}, {19, 7, 43}, {0, 8, 117}, {0, 8, 53}, {0, 9, 202}, {17, 7, 13}, {0, 8, 101}, - {0, 8, 37}, {0, 9, 170}, {0, 8, 5}, {0, 8, 133}, {0, 8, 69}, {0, 9, 234}, {16, 7, 8}, {0, 8, 93}, {0, 8, 29}, {0, 9, 154}, - {20, 7, 83}, {0, 8, 125}, {0, 8, 61}, {0, 9, 218}, {18, 7, 23}, {0, 8, 109}, {0, 8, 45}, {0, 9, 186}, {0, 8, 13}, {0, 8, 141}, - {0, 8, 77}, {0, 9, 250}, {16, 7, 3}, {0, 8, 83}, {0, 8, 19}, {21, 8, 195}, {19, 7, 35}, {0, 8, 115}, {0, 8, 51}, {0, 9, 198}, - {17, 7, 11}, {0, 8, 99}, {0, 8, 35}, {0, 9, 166}, {0, 8, 3}, {0, 8, 131}, {0, 8, 67}, {0, 9, 230}, {16, 7, 7}, {0, 8, 91}, - {0, 8, 27}, {0, 9, 150}, {20, 7, 67}, {0, 8, 123}, {0, 8, 59}, {0, 9, 214}, {18, 7, 19}, {0, 8, 107}, {0, 8, 43}, {0, 9, 182}, - {0, 8, 11}, {0, 8, 139}, {0, 8, 75}, {0, 9, 246}, {16, 7, 5}, {0, 8, 87}, {0, 8, 23}, {64, 8, 0}, {19, 7, 51}, {0, 8, 119}, - {0, 8, 55}, {0, 9, 206}, {17, 7, 15}, {0, 8, 103}, {0, 8, 39}, {0, 9, 174}, {0, 8, 7}, {0, 8, 135}, {0, 8, 71}, {0, 9, 238}, - {16, 7, 9}, {0, 8, 95}, {0, 8, 31}, {0, 9, 158}, {20, 7, 99}, {0, 8, 127}, {0, 8, 63}, {0, 9, 222}, {18, 7, 27}, {0, 8, 111}, - {0, 8, 47}, {0, 9, 190}, {0, 8, 15}, {0, 8, 143}, {0, 8, 79}, {0, 9, 254}, {96, 7, 0}, {0, 8, 80}, {0, 8, 16}, {20, 8, 115}, - {18, 7, 31}, {0, 8, 112}, {0, 8, 48}, {0, 9, 193}, {16, 7, 10}, {0, 8, 96}, {0, 8, 32}, {0, 9, 161}, {0, 8, 0}, {0, 8, 128}, - {0, 8, 64}, {0, 9, 225}, {16, 7, 6}, {0, 8, 88}, {0, 8, 24}, {0, 9, 145}, {19, 7, 59}, {0, 8, 120}, {0, 8, 56}, {0, 9, 209}, - {17, 7, 17}, {0, 8, 104}, {0, 8, 40}, {0, 9, 177}, {0, 8, 8}, {0, 8, 136}, {0, 8, 72}, {0, 9, 241}, {16, 7, 4}, {0, 8, 84}, - {0, 8, 20}, {21, 8, 227}, {19, 7, 43}, {0, 8, 116}, {0, 8, 52}, {0, 9, 201}, {17, 7, 13}, {0, 8, 100}, {0, 8, 36}, {0, 9, 169}, - {0, 8, 4}, {0, 8, 132}, {0, 8, 68}, {0, 9, 233}, {16, 7, 8}, {0, 8, 92}, {0, 8, 28}, {0, 9, 153}, {20, 7, 83}, {0, 8, 124}, - {0, 8, 60}, {0, 9, 217}, {18, 7, 23}, {0, 8, 108}, {0, 8, 44}, {0, 9, 185}, {0, 8, 12}, {0, 8, 140}, {0, 8, 76}, {0, 9, 249}, - {16, 7, 3}, {0, 8, 82}, {0, 8, 18}, {21, 8, 163}, {19, 7, 35}, {0, 8, 114}, {0, 8, 50}, {0, 9, 197}, {17, 7, 11}, {0, 8, 98}, - {0, 8, 34}, {0, 9, 165}, {0, 8, 2}, {0, 8, 130}, {0, 8, 66}, {0, 9, 229}, {16, 7, 7}, {0, 8, 90}, {0, 8, 26}, {0, 9, 149}, - {20, 7, 67}, {0, 8, 122}, {0, 8, 58}, {0, 9, 213}, {18, 7, 19}, {0, 8, 106}, {0, 8, 42}, {0, 9, 181}, {0, 8, 10}, {0, 8, 138}, - {0, 8, 74}, {0, 9, 245}, {16, 7, 5}, {0, 8, 86}, {0, 8, 22}, {64, 8, 0}, {19, 7, 51}, {0, 8, 118}, {0, 8, 54}, {0, 9, 205}, - {17, 7, 15}, {0, 8, 102}, {0, 8, 38}, {0, 9, 173}, {0, 8, 6}, {0, 8, 134}, {0, 8, 70}, {0, 9, 237}, {16, 7, 9}, {0, 8, 94}, - {0, 8, 30}, {0, 9, 157}, {20, 7, 99}, {0, 8, 126}, {0, 8, 62}, {0, 9, 221}, {18, 7, 27}, {0, 8, 110}, {0, 8, 46}, {0, 9, 189}, - {0, 8, 14}, {0, 8, 142}, {0, 8, 78}, {0, 9, 253}, {96, 7, 0}, {0, 8, 81}, {0, 8, 17}, {21, 8, 131}, {18, 7, 31}, {0, 8, 113}, - {0, 8, 49}, {0, 9, 195}, {16, 7, 10}, {0, 8, 97}, {0, 8, 33}, {0, 9, 163}, {0, 8, 1}, {0, 8, 129}, {0, 8, 65}, {0, 9, 227}, - {16, 7, 6}, {0, 8, 89}, {0, 8, 25}, {0, 9, 147}, {19, 7, 59}, {0, 8, 121}, {0, 8, 57}, {0, 9, 211}, {17, 7, 17}, {0, 8, 105}, - {0, 8, 41}, {0, 9, 179}, {0, 8, 9}, {0, 8, 137}, {0, 8, 73}, {0, 9, 243}, {16, 7, 4}, {0, 8, 85}, {0, 8, 21}, {16, 8, 258}, - {19, 7, 43}, {0, 8, 117}, {0, 8, 53}, {0, 9, 203}, {17, 7, 13}, {0, 8, 101}, {0, 8, 37}, {0, 9, 171}, {0, 8, 5}, {0, 8, 133}, - {0, 8, 69}, {0, 9, 235}, {16, 7, 8}, {0, 8, 93}, {0, 8, 29}, {0, 9, 155}, {20, 7, 83}, {0, 8, 125}, {0, 8, 61}, {0, 9, 219}, - {18, 7, 23}, {0, 8, 109}, {0, 8, 45}, {0, 9, 187}, {0, 8, 13}, {0, 8, 141}, {0, 8, 77}, {0, 9, 251}, {16, 7, 3}, {0, 8, 83}, - {0, 8, 19}, {21, 8, 195}, {19, 7, 35}, {0, 8, 115}, {0, 8, 51}, {0, 9, 199}, {17, 7, 11}, {0, 8, 99}, {0, 8, 35}, {0, 9, 167}, - {0, 8, 3}, {0, 8, 131}, {0, 8, 67}, {0, 9, 231}, {16, 7, 7}, {0, 8, 91}, {0, 8, 27}, {0, 9, 151}, {20, 7, 67}, {0, 8, 123}, - {0, 8, 59}, {0, 9, 215}, {18, 7, 19}, {0, 8, 107}, {0, 8, 43}, {0, 9, 183}, {0, 8, 11}, {0, 8, 139}, {0, 8, 75}, {0, 9, 247}, - {16, 7, 5}, {0, 8, 87}, {0, 8, 23}, {64, 8, 0}, {19, 7, 51}, {0, 8, 119}, {0, 8, 55}, {0, 9, 207}, {17, 7, 15}, {0, 8, 103}, - {0, 8, 39}, {0, 9, 175}, {0, 8, 7}, {0, 8, 135}, {0, 8, 71}, {0, 9, 239}, {16, 7, 9}, {0, 8, 95}, {0, 8, 31}, {0, 9, 159}, - {20, 7, 99}, {0, 8, 127}, {0, 8, 63}, {0, 9, 223}, {18, 7, 27}, {0, 8, 111}, {0, 8, 47}, {0, 9, 191}, {0, 8, 15}, {0, 8, 143}, - {0, 8, 79}, {0, 9, 255}}; - -static const code distfix[32] - = {{16, 5, 1}, {23, 5, 257}, {19, 5, 17}, {27, 5, 4097}, {17, 5, 5}, {25, 5, 1025}, {21, 5, 65}, {29, 5, 16385}, - {16, 5, 3}, {24, 5, 513}, {20, 5, 33}, {28, 5, 8193}, {18, 5, 9}, {26, 5, 2049}, {22, 5, 129}, {64, 5, 0}, - {16, 5, 2}, {23, 5, 385}, {19, 5, 25}, {27, 5, 6145}, {17, 5, 7}, {25, 5, 1537}, {21, 5, 97}, {29, 5, 24577}, - {16, 5, 4}, {24, 5, 769}, {20, 5, 49}, {28, 5, 12289}, {18, 5, 13}, {26, 5, 3073}, {22, 5, 193}, {64, 5, 0}}; diff --git a/base/poco/Foundation/src/inflate.c b/base/poco/Foundation/src/inflate.c deleted file mode 100644 index ac333e8c2ed..00000000000 --- a/base/poco/Foundation/src/inflate.c +++ /dev/null @@ -1,1561 +0,0 @@ -/* inflate.c -- zlib decompression - * Copyright (C) 1995-2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * Change history: - * - * 1.2.beta0 24 Nov 2002 - * - First version -- complete rewrite of inflate to simplify code, avoid - * creation of window when not needed, minimize use of window when it is - * needed, make inffast.c even faster, implement gzip decoding, and to - * improve code readability and style over the previous zlib inflate code - * - * 1.2.beta1 25 Nov 2002 - * - Use pointers for available input and output checking in inffast.c - * - Remove input and output counters in inffast.c - * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 - * - Remove unnecessary second byte pull from length extra in inffast.c - * - Unroll direct copy to three copies per loop in inffast.c - * - * 1.2.beta2 4 Dec 2002 - * - Change external routine names to reduce potential conflicts - * - Correct filename to inffixed.h for fixed tables in inflate.c - * - Make hbuf[] unsigned char to match parameter type in inflate.c - * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) - * to avoid negation problem on Alphas (64 bit) in inflate.c - * - * 1.2.beta3 22 Dec 2002 - * - Add comments on state->bits assertion in inffast.c - * - Add comments on op field in inftrees.h - * - Fix bug in reuse of allocated window after inflateReset() - * - Remove bit fields--back to byte structure for speed - * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths - * - Change post-increments to pre-increments in inflate_fast(), PPC biased? - * - Add compile time option, POSTINC, to use post-increments instead (Intel?) - * - Make MATCH copy in inflate() much faster for when inflate_fast() not used - * - Use local copies of stream next and avail values, as well as local bit - * buffer and bit count in inflate()--for speed when inflate_fast() not used - * - * 1.2.beta4 1 Jan 2003 - * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings - * - Move a comment on output buffer sizes from inffast.c to inflate.c - * - Add comments in inffast.c to introduce the inflate_fast() routine - * - Rearrange window copies in inflate_fast() for speed and simplification - * - Unroll last copy for window match in inflate_fast() - * - Use local copies of window variables in inflate_fast() for speed - * - Pull out common wnext == 0 case for speed in inflate_fast() - * - Make op and len in inflate_fast() unsigned for consistency - * - Add FAR to lcode and dcode declarations in inflate_fast() - * - Simplified bad distance check in inflate_fast() - * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new - * source file infback.c to provide a call-back interface to inflate for - * programs like gzip and unzip -- uses window as output buffer to avoid - * window copying - * - * 1.2.beta5 1 Jan 2003 - * - Improved inflateBack() interface to allow the caller to provide initial - * input in strm. - * - Fixed stored blocks bug in inflateBack() - * - * 1.2.beta6 4 Jan 2003 - * - Added comments in inffast.c on effectiveness of POSTINC - * - Typecasting all around to reduce compiler warnings - * - Changed loops from while (1) or do {} while (1) to for (;;), again to - * make compilers happy - * - Changed type of window in inflateBackInit() to unsigned char * - * - * 1.2.beta7 27 Jan 2003 - * - Changed many types to unsigned or unsigned short to avoid warnings - * - Added inflateCopy() function - * - * 1.2.0 9 Mar 2003 - * - Changed inflateBack() interface to provide separate opaque descriptors - * for the in() and out() functions - * - Changed inflateBack() argument and in_func typedef to swap the length - * and buffer address return values for the input function - * - Check next_in and next_out for Z_NULL on entry to inflate() - * - * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -#ifdef MAKEFIXED -# ifndef BUILDFIXED -# define BUILDFIXED -# endif -#endif - -/* function prototypes */ -local int inflateStateCheck OF((z_streamp strm)); -local void fixedtables OF((struct inflate_state FAR *state)); -local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, - unsigned copy)); -#ifdef BUILDFIXED - void makefixed OF((void)); -#endif -local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, - unsigned len)); - -local int inflateStateCheck(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - if (strm == Z_NULL || - strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) - return 1; - state = (struct inflate_state FAR *)strm->state; - if (state == Z_NULL || state->strm != strm || - state->mode < HEAD || state->mode > SYNC) - return 1; - return 0; -} - -int ZEXPORT inflateResetKeep(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - strm->total_in = strm->total_out = state->total = 0; - strm->msg = Z_NULL; - if (state->wrap) /* to support ill-conceived Java test suite */ - strm->adler = state->wrap & 1; - state->mode = HEAD; - state->last = 0; - state->havedict = 0; - state->dmax = 32768U; - state->head = Z_NULL; - state->hold = 0; - state->bits = 0; - state->lencode = state->distcode = state->next = state->codes; - state->sane = 1; - state->back = -1; - Tracev((stderr, "inflate: reset\n")); - return Z_OK; -} - -int ZEXPORT inflateReset(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - state->wsize = 0; - state->whave = 0; - state->wnext = 0; - return inflateResetKeep(strm); -} - -int ZEXPORT inflateReset2(strm, windowBits) -z_streamp strm; -int windowBits; -{ - int wrap; - struct inflate_state FAR *state; - - /* get the state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - - /* extract wrap request from windowBits parameter */ - if (windowBits < 0) { - wrap = 0; - windowBits = -windowBits; - } - else { - wrap = (windowBits >> 4) + 5; -#ifdef GUNZIP - if (windowBits < 48) - windowBits &= 15; -#endif - } - - /* set number of window bits, free window if different */ - if (windowBits && (windowBits < 8 || windowBits > 15)) - return Z_STREAM_ERROR; - if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { - ZFREE(strm, state->window); - state->window = Z_NULL; - } - - /* update state and reset the rest of it */ - state->wrap = wrap; - state->wbits = (unsigned)windowBits; - return inflateReset(strm); -} - -int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) -z_streamp strm; -int windowBits; -const char *version; -int stream_size; -{ - int ret; - struct inflate_state FAR *state; - - if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || - stream_size != (int)(sizeof(z_stream))) - return Z_VERSION_ERROR; - if (strm == Z_NULL) return Z_STREAM_ERROR; - strm->msg = Z_NULL; /* in case we return an error */ - if (strm->zalloc == (alloc_func)0) { -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; -#endif - } - if (strm->zfree == (free_func)0) -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zfree = zcfree; -#endif - state = (struct inflate_state FAR *) - ZALLOC(strm, 1, sizeof(struct inflate_state)); - if (state == Z_NULL) return Z_MEM_ERROR; - Tracev((stderr, "inflate: allocated\n")); - strm->state = (struct internal_state FAR *)state; - state->strm = strm; - state->window = Z_NULL; - state->mode = HEAD; /* to pass state test in inflateReset2() */ - ret = inflateReset2(strm, windowBits); - if (ret != Z_OK) { - ZFREE(strm, state); - strm->state = Z_NULL; - } - return ret; -} - -int ZEXPORT inflateInit_(strm, version, stream_size) -z_streamp strm; -const char *version; -int stream_size; -{ - return inflateInit2_(strm, DEF_WBITS, version, stream_size); -} - -int ZEXPORT inflatePrime(strm, bits, value) -z_streamp strm; -int bits; -int value; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (bits < 0) { - state->hold = 0; - state->bits = 0; - return Z_OK; - } - if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; - value &= (1L << bits) - 1; - state->hold += (unsigned)value << state->bits; - state->bits += (uInt)bits; - return Z_OK; -} - -/* - Return state with length and distance decoding tables and index sizes set to - fixed code decoding. Normally this returns fixed tables from inffixed.h. - If BUILDFIXED is defined, then instead this routine builds the tables the - first time it's called, and returns those tables the first time and - thereafter. This reduces the size of the code by about 2K bytes, in - exchange for a little execution time. However, BUILDFIXED should not be - used for threaded applications, since the rewriting of the tables and virgin - may not be thread-safe. - */ -local void fixedtables(state) -struct inflate_state FAR *state; -{ -#ifdef BUILDFIXED - static int virgin = 1; - static code *lenfix, *distfix; - static code fixed[544]; - - /* build fixed huffman tables if first call (may not be thread safe) */ - if (virgin) { - unsigned sym, bits; - static code *next; - - /* literal/length table */ - sym = 0; - while (sym < 144) state->lens[sym++] = 8; - while (sym < 256) state->lens[sym++] = 9; - while (sym < 280) state->lens[sym++] = 7; - while (sym < 288) state->lens[sym++] = 8; - next = fixed; - lenfix = next; - bits = 9; - inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); - - /* distance table */ - sym = 0; - while (sym < 32) state->lens[sym++] = 5; - distfix = next; - bits = 5; - inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); - - /* do this just once */ - virgin = 0; - } -#else /* !BUILDFIXED */ -# include "inffixed.h" -#endif /* BUILDFIXED */ - state->lencode = lenfix; - state->lenbits = 9; - state->distcode = distfix; - state->distbits = 5; -} - -#ifdef MAKEFIXED -#include - -/* - Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also - defines BUILDFIXED, so the tables are built on the fly. makefixed() writes - those tables to stdout, which would be piped to inffixed.h. A small program - can simply call makefixed to do this: - - void makefixed(void); - - int main(void) - { - makefixed(); - return 0; - } - - Then that can be linked with zlib built with MAKEFIXED defined and run: - - a.out > inffixed.h - */ -void makefixed() -{ - unsigned low, size; - struct inflate_state state; - - fixedtables(&state); - puts(" /* inffixed.h -- table for decoding fixed codes"); - puts(" * Generated automatically by makefixed()."); - puts(" */"); - puts(""); - puts(" /* WARNING: this file should *not* be used by applications."); - puts(" It is part of the implementation of this library and is"); - puts(" subject to change. Applications should only use zlib.h."); - puts(" */"); - puts(""); - size = 1U << 9; - printf(" static const code lenfix[%u] = {", size); - low = 0; - for (;;) { - if ((low % 7) == 0) printf("\n "); - printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, - state.lencode[low].bits, state.lencode[low].val); - if (++low == size) break; - putchar(','); - } - puts("\n };"); - size = 1U << 5; - printf("\n static const code distfix[%u] = {", size); - low = 0; - for (;;) { - if ((low % 6) == 0) printf("\n "); - printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, - state.distcode[low].val); - if (++low == size) break; - putchar(','); - } - puts("\n };"); -} -#endif /* MAKEFIXED */ - -/* - Update the window with the last wsize (normally 32K) bytes written before - returning. If window does not exist yet, create it. This is only called - when a window is already in use, or when output has been written during this - inflate call, but the end of the deflate stream has not been reached yet. - It is also called to create a window for dictionary data when a dictionary - is loaded. - - Providing output buffers larger than 32K to inflate() should provide a speed - advantage, since only the last 32K of output is copied to the sliding window - upon return from inflate(), and since all distances after the first 32K of - output will fall in the output data, making match copies simpler and faster. - The advantage may be dependent on the size of the processor's data caches. - */ -local int updatewindow(strm, end, copy) -z_streamp strm; -const Bytef *end; -unsigned copy; -{ - struct inflate_state FAR *state; - unsigned dist; - - state = (struct inflate_state FAR *)strm->state; - - /* if it hasn't been done already, allocate space for the window */ - if (state->window == Z_NULL) { - state->window = (unsigned char FAR *) - ZALLOC(strm, 1U << state->wbits, - sizeof(unsigned char)); - if (state->window == Z_NULL) return 1; - } - - /* if window not in use yet, initialize */ - if (state->wsize == 0) { - state->wsize = 1U << state->wbits; - state->wnext = 0; - state->whave = 0; - } - - /* copy state->wsize or less output bytes into the circular window */ - if (copy >= state->wsize) { - zmemcpy(state->window, end - state->wsize, state->wsize); - state->wnext = 0; - state->whave = state->wsize; - } - else { - dist = state->wsize - state->wnext; - if (dist > copy) dist = copy; - zmemcpy(state->window + state->wnext, end - copy, dist); - copy -= dist; - if (copy) { - zmemcpy(state->window, end - copy, copy); - state->wnext = copy; - state->whave = state->wsize; - } - else { - state->wnext += dist; - if (state->wnext == state->wsize) state->wnext = 0; - if (state->whave < state->wsize) state->whave += dist; - } - } - return 0; -} - -/* Macros for inflate(): */ - -/* check function to use adler32() for zlib or crc32() for gzip */ -#ifdef GUNZIP -# define UPDATE(check, buf, len) \ - (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) -#else -# define UPDATE(check, buf, len) adler32(check, buf, len) -#endif - -/* check macros for header crc */ -#ifdef GUNZIP -# define CRC2(check, word) \ - do { \ - hbuf[0] = (unsigned char)(word); \ - hbuf[1] = (unsigned char)((word) >> 8); \ - check = crc32(check, hbuf, 2); \ - } while (0) - -# define CRC4(check, word) \ - do { \ - hbuf[0] = (unsigned char)(word); \ - hbuf[1] = (unsigned char)((word) >> 8); \ - hbuf[2] = (unsigned char)((word) >> 16); \ - hbuf[3] = (unsigned char)((word) >> 24); \ - check = crc32(check, hbuf, 4); \ - } while (0) -#endif - -/* Load registers with state in inflate() for speed */ -#define LOAD() \ - do { \ - put = strm->next_out; \ - left = strm->avail_out; \ - next = strm->next_in; \ - have = strm->avail_in; \ - hold = state->hold; \ - bits = state->bits; \ - } while (0) - -/* Restore state from registers in inflate() */ -#define RESTORE() \ - do { \ - strm->next_out = put; \ - strm->avail_out = left; \ - strm->next_in = next; \ - strm->avail_in = have; \ - state->hold = hold; \ - state->bits = bits; \ - } while (0) - -/* Clear the input bit accumulator */ -#define INITBITS() \ - do { \ - hold = 0; \ - bits = 0; \ - } while (0) - -/* Get a byte of input into the bit accumulator, or return from inflate() - if there is no input available. */ -#define PULLBYTE() \ - do { \ - if (have == 0) goto inf_leave; \ - have--; \ - hold += (unsigned long)(*next++) << bits; \ - bits += 8; \ - } while (0) - -/* Assure that there are at least n bits in the bit accumulator. If there is - not enough available input to do that, then return from inflate(). */ -#define NEEDBITS(n) \ - do { \ - while (bits < (unsigned)(n)) \ - PULLBYTE(); \ - } while (0) - -/* Return the low n bits of the bit accumulator (n < 16) */ -#define BITS(n) \ - ((unsigned)hold & ((1U << (n)) - 1)) - -/* Remove n bits from the bit accumulator */ -#define DROPBITS(n) \ - do { \ - hold >>= (n); \ - bits -= (unsigned)(n); \ - } while (0) - -/* Remove zero to seven bits as needed to go to a byte boundary */ -#define BYTEBITS() \ - do { \ - hold >>= bits & 7; \ - bits -= bits & 7; \ - } while (0) - -/* - inflate() uses a state machine to process as much input data and generate as - much output data as possible before returning. The state machine is - structured roughly as follows: - - for (;;) switch (state) { - ... - case STATEn: - if (not enough input data or output space to make progress) - return; - ... make progress ... - state = STATEm; - break; - ... - } - - so when inflate() is called again, the same case is attempted again, and - if the appropriate resources are provided, the machine proceeds to the - next state. The NEEDBITS() macro is usually the way the state evaluates - whether it can proceed or should return. NEEDBITS() does the return if - the requested bits are not available. The typical use of the BITS macros - is: - - NEEDBITS(n); - ... do something with BITS(n) ... - DROPBITS(n); - - where NEEDBITS(n) either returns from inflate() if there isn't enough - input left to load n bits into the accumulator, or it continues. BITS(n) - gives the low n bits in the accumulator. When done, DROPBITS(n) drops - the low n bits off the accumulator. INITBITS() clears the accumulator - and sets the number of available bits to zero. BYTEBITS() discards just - enough bits to put the accumulator on a byte boundary. After BYTEBITS() - and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. - - NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return - if there is no input available. The decoding of variable length codes uses - PULLBYTE() directly in order to pull just enough bytes to decode the next - code, and no more. - - Some states loop until they get enough input, making sure that enough - state information is maintained to continue the loop where it left off - if NEEDBITS() returns in the loop. For example, want, need, and keep - would all have to actually be part of the saved state in case NEEDBITS() - returns: - - case STATEw: - while (want < need) { - NEEDBITS(n); - keep[want++] = BITS(n); - DROPBITS(n); - } - state = STATEx; - case STATEx: - - As shown above, if the next state is also the next case, then the break - is omitted. - - A state may also return if there is not enough output space available to - complete that state. Those states are copying stored data, writing a - literal byte, and copying a matching string. - - When returning, a "goto inf_leave" is used to update the total counters, - update the check value, and determine whether any progress has been made - during that inflate() call in order to return the proper return code. - Progress is defined as a change in either strm->avail_in or strm->avail_out. - When there is a window, goto inf_leave will update the window with the last - output written. If a goto inf_leave occurs in the middle of decompression - and there is no window currently, goto inf_leave will create one and copy - output to the window for the next call of inflate(). - - In this implementation, the flush parameter of inflate() only affects the - return code (per zlib.h). inflate() always writes as much as possible to - strm->next_out, given the space available and the provided input--the effect - documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers - the allocation of and copying into a sliding window until necessary, which - provides the effect documented in zlib.h for Z_FINISH when the entire input - stream available. So the only thing the flush parameter actually does is: - when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it - will return Z_BUF_ERROR if it has not reached the end of the stream. - */ - -int ZEXPORT inflate(strm, flush) -z_streamp strm; -int flush; -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *next; /* next input */ - unsigned char FAR *put; /* next output */ - unsigned have, left; /* available input and output */ - unsigned long hold; /* bit buffer */ - unsigned bits; /* bits in bit buffer */ - unsigned in, out; /* save starting available input and output */ - unsigned copy; /* number of stored or match bytes to copy */ - unsigned char FAR *from; /* where to copy match bytes from */ - code here; /* current decoding table entry */ - code last; /* parent table entry */ - unsigned len; /* length to copy for repeats, bits to drop */ - int ret; /* return code */ -#ifdef GUNZIP - unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ -#endif - static const unsigned short order[19] = /* permutation of code lengths */ - {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - - if (inflateStateCheck(strm) || strm->next_out == Z_NULL || - (strm->next_in == Z_NULL && strm->avail_in != 0)) - return Z_STREAM_ERROR; - - state = (struct inflate_state FAR *)strm->state; - if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ - LOAD(); - in = have; - out = left; - ret = Z_OK; - for (;;) - switch (state->mode) { - case HEAD: - if (state->wrap == 0) { - state->mode = TYPEDO; - break; - } - NEEDBITS(16); -#ifdef GUNZIP - if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ - if (state->wbits == 0) - state->wbits = 15; - state->check = crc32(0L, Z_NULL, 0); - CRC2(state->check, hold); - INITBITS(); - state->mode = FLAGS; - break; - } - state->flags = 0; /* expect zlib header */ - if (state->head != Z_NULL) - state->head->done = -1; - if (!(state->wrap & 1) || /* check if zlib header allowed */ -#else - if ( -#endif - ((BITS(8) << 8) + (hold >> 8)) % 31) { - strm->msg = (char *)"incorrect header check"; - state->mode = BAD; - break; - } - if (BITS(4) != Z_DEFLATED) { - strm->msg = (char *)"unknown compression method"; - state->mode = BAD; - break; - } - DROPBITS(4); - len = BITS(4) + 8; - if (state->wbits == 0) - state->wbits = len; - if (len > 15 || len > state->wbits) { - strm->msg = (char *)"invalid window size"; - state->mode = BAD; - break; - } - state->dmax = 1U << len; - Tracev((stderr, "inflate: zlib header ok\n")); - strm->adler = state->check = adler32(0L, Z_NULL, 0); - state->mode = hold & 0x200 ? DICTID : TYPE; - INITBITS(); - break; -#ifdef GUNZIP - case FLAGS: - NEEDBITS(16); - state->flags = (int)(hold); - if ((state->flags & 0xff) != Z_DEFLATED) { - strm->msg = (char *)"unknown compression method"; - state->mode = BAD; - break; - } - if (state->flags & 0xe000) { - strm->msg = (char *)"unknown header flags set"; - state->mode = BAD; - break; - } - if (state->head != Z_NULL) - state->head->text = (int)((hold >> 8) & 1); - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC2(state->check, hold); - INITBITS(); - state->mode = TIME; - case TIME: - NEEDBITS(32); - if (state->head != Z_NULL) - state->head->time = hold; - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC4(state->check, hold); - INITBITS(); - state->mode = OS; - case OS: - NEEDBITS(16); - if (state->head != Z_NULL) { - state->head->xflags = (int)(hold & 0xff); - state->head->os = (int)(hold >> 8); - } - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC2(state->check, hold); - INITBITS(); - state->mode = EXLEN; - case EXLEN: - if (state->flags & 0x0400) { - NEEDBITS(16); - state->length = (unsigned)(hold); - if (state->head != Z_NULL) - state->head->extra_len = (unsigned)hold; - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC2(state->check, hold); - INITBITS(); - } - else if (state->head != Z_NULL) - state->head->extra = Z_NULL; - state->mode = EXTRA; - case EXTRA: - if (state->flags & 0x0400) { - copy = state->length; - if (copy > have) copy = have; - if (copy) { - if (state->head != Z_NULL && - state->head->extra != Z_NULL) { - len = state->head->extra_len - state->length; - zmemcpy(state->head->extra + len, next, - len + copy > state->head->extra_max ? - state->head->extra_max - len : copy); - } - if ((state->flags & 0x0200) && (state->wrap & 4)) - state->check = crc32(state->check, next, copy); - have -= copy; - next += copy; - state->length -= copy; - } - if (state->length) goto inf_leave; - } - state->length = 0; - state->mode = NAME; - case NAME: - if (state->flags & 0x0800) { - if (have == 0) goto inf_leave; - copy = 0; - do { - len = (unsigned)(next[copy++]); - if (state->head != Z_NULL && - state->head->name != Z_NULL && - state->length < state->head->name_max) - state->head->name[state->length++] = (Bytef)len; - } while (len && copy < have); - if ((state->flags & 0x0200) && (state->wrap & 4)) - state->check = crc32(state->check, next, copy); - have -= copy; - next += copy; - if (len) goto inf_leave; - } - else if (state->head != Z_NULL) - state->head->name = Z_NULL; - state->length = 0; - state->mode = COMMENT; - case COMMENT: - if (state->flags & 0x1000) { - if (have == 0) goto inf_leave; - copy = 0; - do { - len = (unsigned)(next[copy++]); - if (state->head != Z_NULL && - state->head->comment != Z_NULL && - state->length < state->head->comm_max) - state->head->comment[state->length++] = (Bytef)len; - } while (len && copy < have); - if ((state->flags & 0x0200) && (state->wrap & 4)) - state->check = crc32(state->check, next, copy); - have -= copy; - next += copy; - if (len) goto inf_leave; - } - else if (state->head != Z_NULL) - state->head->comment = Z_NULL; - state->mode = HCRC; - case HCRC: - if (state->flags & 0x0200) { - NEEDBITS(16); - if ((state->wrap & 4) && hold != (state->check & 0xffff)) { - strm->msg = (char *)"header crc mismatch"; - state->mode = BAD; - break; - } - INITBITS(); - } - if (state->head != Z_NULL) { - state->head->hcrc = (int)((state->flags >> 9) & 1); - state->head->done = 1; - } - strm->adler = state->check = crc32(0L, Z_NULL, 0); - state->mode = TYPE; - break; -#endif - case DICTID: - NEEDBITS(32); - strm->adler = state->check = ZSWAP32(hold); - INITBITS(); - state->mode = DICT; - case DICT: - if (state->havedict == 0) { - RESTORE(); - return Z_NEED_DICT; - } - strm->adler = state->check = adler32(0L, Z_NULL, 0); - state->mode = TYPE; - case TYPE: - if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; - case TYPEDO: - if (state->last) { - BYTEBITS(); - state->mode = CHECK; - break; - } - NEEDBITS(3); - state->last = BITS(1); - DROPBITS(1); - switch (BITS(2)) { - case 0: /* stored block */ - Tracev((stderr, "inflate: stored block%s\n", - state->last ? " (last)" : "")); - state->mode = STORED; - break; - case 1: /* fixed block */ - fixedtables(state); - Tracev((stderr, "inflate: fixed codes block%s\n", - state->last ? " (last)" : "")); - state->mode = LEN_; /* decode codes */ - if (flush == Z_TREES) { - DROPBITS(2); - goto inf_leave; - } - break; - case 2: /* dynamic block */ - Tracev((stderr, "inflate: dynamic codes block%s\n", - state->last ? " (last)" : "")); - state->mode = TABLE; - break; - case 3: - strm->msg = (char *)"invalid block type"; - state->mode = BAD; - } - DROPBITS(2); - break; - case STORED: - BYTEBITS(); /* go to byte boundary */ - NEEDBITS(32); - if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { - strm->msg = (char *)"invalid stored block lengths"; - state->mode = BAD; - break; - } - state->length = (unsigned)hold & 0xffff; - Tracev((stderr, "inflate: stored length %u\n", - state->length)); - INITBITS(); - state->mode = COPY_; - if (flush == Z_TREES) goto inf_leave; - case COPY_: - state->mode = COPY; - case COPY: - copy = state->length; - if (copy) { - if (copy > have) copy = have; - if (copy > left) copy = left; - if (copy == 0) goto inf_leave; - zmemcpy(put, next, copy); - have -= copy; - next += copy; - left -= copy; - put += copy; - state->length -= copy; - break; - } - Tracev((stderr, "inflate: stored end\n")); - state->mode = TYPE; - break; - case TABLE: - NEEDBITS(14); - state->nlen = BITS(5) + 257; - DROPBITS(5); - state->ndist = BITS(5) + 1; - DROPBITS(5); - state->ncode = BITS(4) + 4; - DROPBITS(4); -#ifndef PKZIP_BUG_WORKAROUND - if (state->nlen > 286 || state->ndist > 30) { - strm->msg = (char *)"too many length or distance symbols"; - state->mode = BAD; - break; - } -#endif - Tracev((stderr, "inflate: table sizes ok\n")); - state->have = 0; - state->mode = LENLENS; - case LENLENS: - while (state->have < state->ncode) { - NEEDBITS(3); - state->lens[order[state->have++]] = (unsigned short)BITS(3); - DROPBITS(3); - } - while (state->have < 19) - state->lens[order[state->have++]] = 0; - state->next = state->codes; - state->lencode = (const code FAR *)(state->next); - state->lenbits = 7; - ret = inflate_table(CODES, state->lens, 19, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid code lengths set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: code lengths ok\n")); - state->have = 0; - state->mode = CODELENS; - case CODELENS: - while (state->have < state->nlen + state->ndist) { - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.val < 16) { - DROPBITS(here.bits); - state->lens[state->have++] = here.val; - } - else { - if (here.val == 16) { - NEEDBITS(here.bits + 2); - DROPBITS(here.bits); - if (state->have == 0) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - len = state->lens[state->have - 1]; - copy = 3 + BITS(2); - DROPBITS(2); - } - else if (here.val == 17) { - NEEDBITS(here.bits + 3); - DROPBITS(here.bits); - len = 0; - copy = 3 + BITS(3); - DROPBITS(3); - } - else { - NEEDBITS(here.bits + 7); - DROPBITS(here.bits); - len = 0; - copy = 11 + BITS(7); - DROPBITS(7); - } - if (state->have + copy > state->nlen + state->ndist) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - while (copy--) - state->lens[state->have++] = (unsigned short)len; - } - } - - /* handle error breaks in while */ - if (state->mode == BAD) break; - - /* check for end-of-block code (better have one) */ - if (state->lens[256] == 0) { - strm->msg = (char *)"invalid code -- missing end-of-block"; - state->mode = BAD; - break; - } - - /* build code tables -- note: do not change the lenbits or distbits - values here (9 and 6) without reading the comments in inftrees.h - concerning the ENOUGH constants, which depend on those values */ - state->next = state->codes; - state->lencode = (const code FAR *)(state->next); - state->lenbits = 9; - ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid literal/lengths set"; - state->mode = BAD; - break; - } - state->distcode = (const code FAR *)(state->next); - state->distbits = 6; - ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, - &(state->next), &(state->distbits), state->work); - if (ret) { - strm->msg = (char *)"invalid distances set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: codes ok\n")); - state->mode = LEN_; - if (flush == Z_TREES) goto inf_leave; - case LEN_: - state->mode = LEN; - case LEN: - if (have >= 6 && left >= 258) { - RESTORE(); - inflate_fast(strm, out); - LOAD(); - if (state->mode == TYPE) - state->back = -1; - break; - } - state->back = 0; - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.op && (here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->lencode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - state->back += last.bits; - } - DROPBITS(here.bits); - state->back += here.bits; - state->length = (unsigned)here.val; - if ((int)(here.op) == 0) { - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - state->mode = LIT; - break; - } - if (here.op & 32) { - Tracevv((stderr, "inflate: end of block\n")); - state->back = -1; - state->mode = TYPE; - break; - } - if (here.op & 64) { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - state->extra = (unsigned)(here.op) & 15; - state->mode = LENEXT; - case LENEXT: - if (state->extra) { - NEEDBITS(state->extra); - state->length += BITS(state->extra); - DROPBITS(state->extra); - state->back += state->extra; - } - Tracevv((stderr, "inflate: length %u\n", state->length)); - state->was = state->length; - state->mode = DIST; - case DIST: - for (;;) { - here = state->distcode[BITS(state->distbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if ((here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->distcode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - state->back += last.bits; - } - DROPBITS(here.bits); - state->back += here.bits; - if (here.op & 64) { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - state->offset = (unsigned)here.val; - state->extra = (unsigned)(here.op) & 15; - state->mode = DISTEXT; - case DISTEXT: - if (state->extra) { - NEEDBITS(state->extra); - state->offset += BITS(state->extra); - DROPBITS(state->extra); - state->back += state->extra; - } -#ifdef INFLATE_STRICT - if (state->offset > state->dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - Tracevv((stderr, "inflate: distance %u\n", state->offset)); - state->mode = MATCH; - case MATCH: - if (left == 0) goto inf_leave; - copy = out - left; - if (state->offset > copy) { /* copy from window */ - copy = state->offset - copy; - if (copy > state->whave) { - if (state->sane) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - Trace((stderr, "inflate.c too far\n")); - copy -= state->whave; - if (copy > state->length) copy = state->length; - if (copy > left) copy = left; - left -= copy; - state->length -= copy; - do { - *put++ = 0; - } while (--copy); - if (state->length == 0) state->mode = LEN; - break; -#endif - } - if (copy > state->wnext) { - copy -= state->wnext; - from = state->window + (state->wsize - copy); - } - else - from = state->window + (state->wnext - copy); - if (copy > state->length) copy = state->length; - } - else { /* copy from output */ - from = put - state->offset; - copy = state->length; - } - if (copy > left) copy = left; - left -= copy; - state->length -= copy; - do { - *put++ = *from++; - } while (--copy); - if (state->length == 0) state->mode = LEN; - break; - case LIT: - if (left == 0) goto inf_leave; - *put++ = (unsigned char)(state->length); - left--; - state->mode = LEN; - break; - case CHECK: - if (state->wrap) { - NEEDBITS(32); - out -= left; - strm->total_out += out; - state->total += out; - if ((state->wrap & 4) && out) - strm->adler = state->check = - UPDATE(state->check, put - out, out); - out = left; - if ((state->wrap & 4) && ( -#ifdef GUNZIP - state->flags ? hold : -#endif - ZSWAP32(hold)) != state->check) { - strm->msg = (char *)"incorrect data check"; - state->mode = BAD; - break; - } - INITBITS(); - Tracev((stderr, "inflate: check matches trailer\n")); - } -#ifdef GUNZIP - state->mode = LENGTH; - case LENGTH: - if (state->wrap && state->flags) { - NEEDBITS(32); - if (hold != (state->total & 0xffffffffUL)) { - strm->msg = (char *)"incorrect length check"; - state->mode = BAD; - break; - } - INITBITS(); - Tracev((stderr, "inflate: length matches trailer\n")); - } -#endif - state->mode = DONE; - case DONE: - ret = Z_STREAM_END; - goto inf_leave; - case BAD: - ret = Z_DATA_ERROR; - goto inf_leave; - case MEM: - return Z_MEM_ERROR; - case SYNC: - default: - return Z_STREAM_ERROR; - } - - /* - Return from inflate(), updating the total counts and the check value. - If there was no progress during the inflate() call, return a buffer - error. Call updatewindow() to create and/or update the window state. - Note: a memory error from inflate() is non-recoverable. - */ - inf_leave: - RESTORE(); - if (state->wsize || (out != strm->avail_out && state->mode < BAD && - (state->mode < CHECK || flush != Z_FINISH))) - if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { - state->mode = MEM; - return Z_MEM_ERROR; - } - in -= strm->avail_in; - out -= strm->avail_out; - strm->total_in += in; - strm->total_out += out; - state->total += out; - if ((state->wrap & 4) && out) - strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); - strm->data_type = (int)state->bits + (state->last ? 64 : 0) + - (state->mode == TYPE ? 128 : 0) + - (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); - if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) - ret = Z_BUF_ERROR; - return ret; -} - -int ZEXPORT inflateEnd(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - if (inflateStateCheck(strm)) - return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (state->window != Z_NULL) ZFREE(strm, state->window); - ZFREE(strm, strm->state); - strm->state = Z_NULL; - Tracev((stderr, "inflate: end\n")); - return Z_OK; -} - -int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) -z_streamp strm; -Bytef *dictionary; -uInt *dictLength; -{ - struct inflate_state FAR *state; - - /* check state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - - /* copy dictionary */ - if (state->whave && dictionary != Z_NULL) { - zmemcpy(dictionary, state->window + state->wnext, - state->whave - state->wnext); - zmemcpy(dictionary + state->whave - state->wnext, - state->window, state->wnext); - } - if (dictLength != Z_NULL) - *dictLength = state->whave; - return Z_OK; -} - -int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) -z_streamp strm; -const Bytef *dictionary; -uInt dictLength; -{ - struct inflate_state FAR *state; - unsigned long dictid; - int ret; - - /* check state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (state->wrap != 0 && state->mode != DICT) - return Z_STREAM_ERROR; - - /* check for correct dictionary identifier */ - if (state->mode == DICT) { - dictid = adler32(0L, Z_NULL, 0); - dictid = adler32(dictid, dictionary, dictLength); - if (dictid != state->check) - return Z_DATA_ERROR; - } - - /* copy dictionary to window using updatewindow(), which will amend the - existing dictionary if appropriate */ - ret = updatewindow(strm, dictionary + dictLength, dictLength); - if (ret) { - state->mode = MEM; - return Z_MEM_ERROR; - } - state->havedict = 1; - Tracev((stderr, "inflate: dictionary set\n")); - return Z_OK; -} - -int ZEXPORT inflateGetHeader(strm, head) -z_streamp strm; -gz_headerp head; -{ - struct inflate_state FAR *state; - - /* check state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; - - /* save header structure */ - state->head = head; - head->done = 0; - return Z_OK; -} - -/* - Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found - or when out of input. When called, *have is the number of pattern bytes - found in order so far, in 0..3. On return *have is updated to the new - state. If on return *have equals four, then the pattern was found and the - return value is how many bytes were read including the last byte of the - pattern. If *have is less than four, then the pattern has not been found - yet and the return value is len. In the latter case, syncsearch() can be - called again with more data and the *have state. *have is initialized to - zero for the first call. - */ -local unsigned syncsearch(have, buf, len) -unsigned FAR *have; -const unsigned char FAR *buf; -unsigned len; -{ - unsigned got; - unsigned next; - - got = *have; - next = 0; - while (next < len && got < 4) { - if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) - got++; - else if (buf[next]) - got = 0; - else - got = 4 - got; - next++; - } - *have = got; - return next; -} - -int ZEXPORT inflateSync(strm) -z_streamp strm; -{ - unsigned len; /* number of bytes to look at or looked at */ - unsigned long in, out; /* temporary to save total_in and total_out */ - unsigned char buf[4]; /* to restore bit buffer to byte string */ - struct inflate_state FAR *state; - - /* check parameters */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; - - /* if first time, start search in bit buffer */ - if (state->mode != SYNC) { - state->mode = SYNC; - state->hold <<= state->bits & 7; - state->bits -= state->bits & 7; - len = 0; - while (state->bits >= 8) { - buf[len++] = (unsigned char)(state->hold); - state->hold >>= 8; - state->bits -= 8; - } - state->have = 0; - syncsearch(&(state->have), buf, len); - } - - /* search available input */ - len = syncsearch(&(state->have), strm->next_in, strm->avail_in); - strm->avail_in -= len; - strm->next_in += len; - strm->total_in += len; - - /* return no joy or set up to restart inflate() on a new block */ - if (state->have != 4) return Z_DATA_ERROR; - in = strm->total_in; out = strm->total_out; - inflateReset(strm); - strm->total_in = in; strm->total_out = out; - state->mode = TYPE; - return Z_OK; -} - -/* - Returns true if inflate is currently at the end of a block generated by - Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP - implementation to provide an additional safety check. PPP uses - Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored - block. When decompressing, PPP checks that at the end of input packet, - inflate is waiting for these length bytes. - */ -int ZEXPORT inflateSyncPoint(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - return state->mode == STORED && state->bits == 0; -} - -int ZEXPORT inflateCopy(dest, source) -z_streamp dest; -z_streamp source; -{ - struct inflate_state FAR *state; - struct inflate_state FAR *copy; - unsigned char FAR *window; - unsigned wsize; - - /* check input */ - if (inflateStateCheck(source) || dest == Z_NULL) - return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)source->state; - - /* allocate space */ - copy = (struct inflate_state FAR *) - ZALLOC(source, 1, sizeof(struct inflate_state)); - if (copy == Z_NULL) return Z_MEM_ERROR; - window = Z_NULL; - if (state->window != Z_NULL) { - window = (unsigned char FAR *) - ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); - if (window == Z_NULL) { - ZFREE(source, copy); - return Z_MEM_ERROR; - } - } - - /* copy state */ - zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); - zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); - copy->strm = dest; - if (state->lencode >= state->codes && - state->lencode <= state->codes + ENOUGH - 1) { - copy->lencode = copy->codes + (state->lencode - state->codes); - copy->distcode = copy->codes + (state->distcode - state->codes); - } - copy->next = copy->codes + (state->next - state->codes); - if (window != Z_NULL) { - wsize = 1U << state->wbits; - zmemcpy(window, state->window, wsize); - } - copy->window = window; - dest->state = (struct internal_state FAR *)copy; - return Z_OK; -} - -int ZEXPORT inflateUndermine(strm, subvert) -z_streamp strm; -int subvert; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - state->sane = !subvert; - return Z_OK; -#else - (void)subvert; - state->sane = 1; - return Z_DATA_ERROR; -#endif -} - -int ZEXPORT inflateValidate(strm, check) -z_streamp strm; -int check; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (check) - state->wrap |= 4; - else - state->wrap &= ~4; - return Z_OK; -} - -long ZEXPORT inflateMark(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) - return -(1L << 16); - state = (struct inflate_state FAR *)strm->state; - return (long)(((unsigned long)((long)state->back)) << 16) + - (state->mode == COPY ? state->length : - (state->mode == MATCH ? state->was - state->length : 0)); -} - -unsigned long ZEXPORT inflateCodesUsed(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - if (inflateStateCheck(strm)) return (unsigned long)-1; - state = (struct inflate_state FAR *)strm->state; - return (unsigned long)(state->next - state->codes); -} diff --git a/base/poco/Foundation/src/inflate.h b/base/poco/Foundation/src/inflate.h deleted file mode 100644 index 6096e40fb35..00000000000 --- a/base/poco/Foundation/src/inflate.h +++ /dev/null @@ -1,127 +0,0 @@ -/* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* define NO_GZIP when compiling if you want to disable gzip header and - trailer decoding by inflate(). NO_GZIP would be used to avoid linking in - the crc code when it is not needed. For shared libraries, gzip decoding - should be left enabled. */ -#ifndef NO_GZIP -# define GUNZIP -#endif - -/* Possible inflate modes between inflate() calls */ -typedef enum -{ - HEAD = 16180, /* i: waiting for magic header */ - FLAGS, /* i: waiting for method and flags (gzip) */ - TIME, /* i: waiting for modification time (gzip) */ - OS, /* i: waiting for extra flags and operating system (gzip) */ - EXLEN, /* i: waiting for extra length (gzip) */ - EXTRA, /* i: waiting for extra bytes (gzip) */ - NAME, /* i: waiting for end of file name (gzip) */ - COMMENT, /* i: waiting for end of comment (gzip) */ - HCRC, /* i: waiting for header crc (gzip) */ - DICTID, /* i: waiting for dictionary check value */ - DICT, /* waiting for inflateSetDictionary() call */ - TYPE, /* i: waiting for type bits, including last-flag bit */ - TYPEDO, /* i: same, but skip check to exit inflate on new block */ - STORED, /* i: waiting for stored size (length and complement) */ - COPY_, /* i/o: same as COPY below, but only first time in */ - COPY, /* i/o: waiting for input or output to copy stored block */ - TABLE, /* i: waiting for dynamic block table lengths */ - LENLENS, /* i: waiting for code length code lengths */ - CODELENS, /* i: waiting for length/lit and distance code lengths */ - LEN_, /* i: same as LEN below, but only first time in */ - LEN, /* i: waiting for length/lit/eob code */ - LENEXT, /* i: waiting for length extra bits */ - DIST, /* i: waiting for distance code */ - DISTEXT, /* i: waiting for distance extra bits */ - MATCH, /* o: waiting for output space to copy string */ - LIT, /* o: waiting for output space to write literal */ - CHECK, /* i: waiting for 32-bit check value */ - LENGTH, /* i: waiting for 32-bit length (gzip) */ - DONE, /* finished check, done -- remain here until reset */ - BAD, /* got a data error -- remain here until reset */ - MEM, /* got an inflate() memory error -- remain here until reset */ - SYNC /* looking for synchronization bytes to restart inflate() */ -} inflate_mode; - -/* - State transitions between above modes - - - (most modes can go to BAD or MEM on error -- not shown for clarity) - - Process header: - HEAD -> (gzip) or (zlib) or (raw) - (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> - HCRC -> TYPE - (zlib) -> DICTID or TYPE - DICTID -> DICT -> TYPE - (raw) -> TYPEDO - Read deflate blocks: - TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK - STORED -> COPY_ -> COPY -> TYPE - TABLE -> LENLENS -> CODELENS -> LEN_ - LEN_ -> LEN - Read deflate codes in fixed or dynamic block: - LEN -> LENEXT or LIT or TYPE - LENEXT -> DIST -> DISTEXT -> MATCH -> LEN - LIT -> LEN - Process trailer: - CHECK -> LENGTH -> DONE - */ - -/* State maintained between inflate() calls -- approximately 7K bytes, not - including the allocated sliding window, which is up to 32K bytes. */ -struct inflate_state -{ - z_streamp strm; /* pointer back to this zlib stream */ - inflate_mode mode; /* current inflate mode */ - int last; /* true if processing last block */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip, - bit 2 true to validate check value */ - int havedict; /* true if dictionary provided */ - int flags; /* gzip header method and flags (0 if zlib) */ - unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ - unsigned long check; /* protected copy of check value */ - unsigned long total; /* protected copy of output count */ - gz_headerp head; /* where to save gzip header information */ - /* sliding window */ - unsigned wbits; /* log base 2 of requested window size */ - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ - unsigned char FAR * window; /* allocated sliding window, if needed */ - /* bit accumulator */ - unsigned long hold; /* input bit accumulator */ - unsigned bits; /* number of bits in "in" */ - /* for string and stored block copying */ - unsigned length; /* literal or length of data to copy */ - unsigned offset; /* distance back to copy string from */ - /* for table and code decoding */ - unsigned extra; /* extra bits needed */ - /* fixed and dynamic code tables */ - code const FAR * lencode; /* starting table for length/literal codes */ - code const FAR * distcode; /* starting table for distance codes */ - unsigned lenbits; /* index bits for lencode */ - unsigned distbits; /* index bits for distcode */ - /* dynamic table building */ - unsigned ncode; /* number of code length code lengths */ - unsigned nlen; /* number of length code lengths */ - unsigned ndist; /* number of distance code lengths */ - unsigned have; /* number of code lengths in lens[] */ - code FAR * next; /* next available space in codes[] */ - unsigned short lens[320]; /* temporary storage for code lengths */ - unsigned short work[288]; /* work area for code table building */ - code codes[ENOUGH]; /* space for code tables */ - int sane; /* if false, allow invalid distance too far */ - int back; /* bits back of last unprocessed length/lit */ - unsigned was; /* initial length of match */ -}; diff --git a/base/poco/Foundation/src/inftrees.c b/base/poco/Foundation/src/inftrees.c deleted file mode 100644 index 2ea08fc13ea..00000000000 --- a/base/poco/Foundation/src/inftrees.c +++ /dev/null @@ -1,304 +0,0 @@ -/* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2017 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include "zutil.h" -#include "inftrees.h" - -#define MAXBITS 15 - -const char inflate_copyright[] = - " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; -/* - If you use the zlib library in a product, an acknowledgment is welcome - in the documentation of your product. If for some reason you cannot - include such an acknowledgment, I would appreciate that you keep this - copyright string in the executable of your product. - */ - -/* - Build a set of tables to decode the provided canonical Huffman code. - The code lengths are lens[0..codes-1]. The result starts at *table, - whose indices are 0..2^bits-1. work is a writable array of at least - lens shorts, which is used as a work area. type is the type of code - to be generated, CODES, LENS, or DISTS. On return, zero is success, - -1 is an invalid code, and +1 means that ENOUGH isn't enough. table - on return points to the next available entry's address. bits is the - requested root table index bits, and on return it is the actual root - table index bits. It will differ if the request is greater than the - longest code or if it is less than the shortest code. - */ -int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) -codetype type; -unsigned short FAR *lens; -unsigned codes; -code FAR * FAR *table; -unsigned FAR *bits; -unsigned short FAR *work; -{ - unsigned len; /* a code's length in bits */ - unsigned sym; /* index of code symbols */ - unsigned min, max; /* minimum and maximum code lengths */ - unsigned root; /* number of index bits for root table */ - unsigned curr; /* number of index bits for current table */ - unsigned drop; /* code bits to drop for sub-table */ - int left; /* number of prefix codes available */ - unsigned used; /* code entries in table used */ - unsigned huff; /* Huffman code */ - unsigned incr; /* for incrementing code, index */ - unsigned fill; /* index for replicating entries */ - unsigned low; /* low bits for current root entry */ - unsigned mask; /* mask for low root bits */ - code here; /* table entry for duplication */ - code FAR *next; /* next available space in table */ - const unsigned short FAR *base; /* base value table to use */ - const unsigned short FAR *extra; /* extra bits table to use */ - unsigned match; /* use base and extra for symbol >= match */ - unsigned short count[MAXBITS+1]; /* number of codes of each length */ - unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ - static const unsigned short lbase[31] = { /* Length codes 257..285 base */ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - static const unsigned short lext[31] = { /* Length codes 257..285 extra */ - 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; - static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, - 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, - 8193, 12289, 16385, 24577, 0, 0}; - static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ - 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, - 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, - 28, 28, 29, 29, 64, 64}; - - /* - Process a set of code lengths to create a canonical Huffman code. The - code lengths are lens[0..codes-1]. Each length corresponds to the - symbols 0..codes-1. The Huffman code is generated by first sorting the - symbols by length from short to long, and retaining the symbol order - for codes with equal lengths. Then the code starts with all zero bits - for the first code of the shortest length, and the codes are integer - increments for the same length, and zeros are appended as the length - increases. For the deflate format, these bits are stored backwards - from their more natural integer increment ordering, and so when the - decoding tables are built in the large loop below, the integer codes - are incremented backwards. - - This routine assumes, but does not check, that all of the entries in - lens[] are in the range 0..MAXBITS. The caller must assure this. - 1..MAXBITS is interpreted as that code length. zero means that that - symbol does not occur in this code. - - The codes are sorted by computing a count of codes for each length, - creating from that a table of starting indices for each length in the - sorted table, and then entering the symbols in order in the sorted - table. The sorted table is work[], with that space being provided by - the caller. - - The length counts are used for other purposes as well, i.e. finding - the minimum and maximum length codes, determining if there are any - codes at all, checking for a valid set of lengths, and looking ahead - at length counts to determine sub-table sizes when building the - decoding tables. - */ - - /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ - for (len = 0; len <= MAXBITS; len++) - count[len] = 0; - for (sym = 0; sym < codes; sym++) - count[lens[sym]]++; - - /* bound code lengths, force root to be within code lengths */ - root = *bits; - for (max = MAXBITS; max >= 1; max--) - if (count[max] != 0) break; - if (root > max) root = max; - if (max == 0) { /* no symbols to code at all */ - here.op = (unsigned char)64; /* invalid code marker */ - here.bits = (unsigned char)1; - here.val = (unsigned short)0; - *(*table)++ = here; /* make a table to force an error */ - *(*table)++ = here; - *bits = 1; - return 0; /* no symbols, but wait for decoding to report error */ - } - for (min = 1; min < max; min++) - if (count[min] != 0) break; - if (root < min) root = min; - - /* check for an over-subscribed or incomplete set of lengths */ - left = 1; - for (len = 1; len <= MAXBITS; len++) { - left <<= 1; - left -= count[len]; - if (left < 0) return -1; /* over-subscribed */ - } - if (left > 0 && (type == CODES || max != 1)) - return -1; /* incomplete set */ - - /* generate offsets into symbol table for each length for sorting */ - offs[1] = 0; - for (len = 1; len < MAXBITS; len++) - offs[len + 1] = offs[len] + count[len]; - - /* sort symbols by length, by symbol order within each length */ - for (sym = 0; sym < codes; sym++) - if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; - - /* - Create and fill in decoding tables. In this loop, the table being - filled is at next and has curr index bits. The code being used is huff - with length len. That code is converted to an index by dropping drop - bits off of the bottom. For codes where len is less than drop + curr, - those top drop + curr - len bits are incremented through all values to - fill the table with replicated entries. - - root is the number of index bits for the root table. When len exceeds - root, sub-tables are created pointed to by the root entry with an index - of the low root bits of huff. This is saved in low to check for when a - new sub-table should be started. drop is zero when the root table is - being filled, and drop is root when sub-tables are being filled. - - When a new sub-table is needed, it is necessary to look ahead in the - code lengths to determine what size sub-table is needed. The length - counts are used for this, and so count[] is decremented as codes are - entered in the tables. - - used keeps track of how many table entries have been allocated from the - provided *table space. It is checked for LENS and DIST tables against - the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in - the initial root table size constants. See the comments in inftrees.h - for more information. - - sym increments through all symbols, and the loop terminates when - all codes of length max, i.e. all codes, have been processed. This - routine permits incomplete codes, so another loop after this one fills - in the rest of the decoding tables with invalid code markers. - */ - - /* set up for code type */ - switch (type) { - case CODES: - base = extra = work; /* dummy value--not used */ - match = 20; - break; - case LENS: - base = lbase; - extra = lext; - match = 257; - break; - default: /* DISTS */ - base = dbase; - extra = dext; - match = 0; - } - - /* initialize state for loop */ - huff = 0; /* starting code */ - sym = 0; /* starting code symbol */ - len = min; /* starting code length */ - next = *table; /* current table to fill in */ - curr = root; /* current table index bits */ - drop = 0; /* current bits to drop from code for index */ - low = (unsigned)(-1); /* trigger new sub-table when len > root */ - used = 1U << root; /* use root table entries */ - mask = used - 1; /* mask for comparing low */ - - /* check available table space */ - if ((type == LENS && used > ENOUGH_LENS) || - (type == DISTS && used > ENOUGH_DISTS)) - return 1; - - /* process all codes and make table entries */ - for (;;) { - /* create table entry */ - here.bits = (unsigned char)(len - drop); - if (work[sym] + 1U < match) { - here.op = (unsigned char)0; - here.val = work[sym]; - } - else if (work[sym] >= match) { - here.op = (unsigned char)(extra[work[sym] - match]); - here.val = base[work[sym] - match]; - } - else { - here.op = (unsigned char)(32 + 64); /* end of block */ - here.val = 0; - } - - /* replicate for those indices with low len bits equal to huff */ - incr = 1U << (len - drop); - fill = 1U << curr; - min = fill; /* save offset to next table */ - do { - fill -= incr; - next[(huff >> drop) + fill] = here; - } while (fill != 0); - - /* backwards increment the len-bit code huff */ - incr = 1U << (len - 1); - while (huff & incr) - incr >>= 1; - if (incr != 0) { - huff &= incr - 1; - huff += incr; - } - else - huff = 0; - - /* go to next symbol, update count, len */ - sym++; - if (--(count[len]) == 0) { - if (len == max) break; - len = lens[work[sym]]; - } - - /* create new sub-table if needed */ - if (len > root && (huff & mask) != low) { - /* if first time, transition to sub-tables */ - if (drop == 0) - drop = root; - - /* increment past last table */ - next += min; /* here min is 1 << curr */ - - /* determine length of next table */ - curr = len - drop; - left = (int)(1 << curr); - while (curr + drop < max) { - left -= count[curr + drop]; - if (left <= 0) break; - curr++; - left <<= 1; - } - - /* check for enough space */ - used += 1U << curr; - if ((type == LENS && used > ENOUGH_LENS) || - (type == DISTS && used > ENOUGH_DISTS)) - return 1; - - /* point entry in root table to sub-table */ - low = huff & mask; - (*table)[low].op = (unsigned char)curr; - (*table)[low].bits = (unsigned char)root; - (*table)[low].val = (unsigned short)(next - *table); - } - } - - /* fill in remaining table entry if code is incomplete (guaranteed to have - at most one remaining entry, since if the code is incomplete, the - maximum code length that was allowed to get this far is one bit) */ - if (huff != 0) { - here.op = (unsigned char)64; /* invalid code marker */ - here.bits = (unsigned char)(len - drop); - here.val = (unsigned short)0; - next[huff] = here; - } - - /* set return parameters */ - *table += used; - *bits = root; - return 0; -} diff --git a/base/poco/Foundation/src/inftrees.h b/base/poco/Foundation/src/inftrees.h deleted file mode 100644 index 55407138bd6..00000000000 --- a/base/poco/Foundation/src/inftrees.h +++ /dev/null @@ -1,63 +0,0 @@ -/* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995-2005, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Structure for decoding tables. Each entry provides either the - information needed to do the operation requested by the code that - indexed that table entry, or it provides a pointer to another - table that indexes more bits of the code. op indicates whether - the entry is a pointer to another table, a literal, a length or - distance, an end-of-block, or an invalid code. For a table - pointer, the low four bits of op is the number of index bits of - that table. For a length or distance, the low four bits of op - is the number of extra bits to get after the code. bits is - the number of bits in this code or part of the code to drop off - of the bit buffer. val is the actual byte to output in the case - of a literal, the base length or distance, or the offset from - the current table to the next table. Each entry is four bytes. */ -typedef struct -{ - unsigned char op; /* operation, extra bits, table bits */ - unsigned char bits; /* bits in this part of the code */ - unsigned short val; /* offset in table or code value */ -} code; - -/* op values as set by inflate_table(): - 00000000 - literal - 0000tttt - table link, tttt != 0 is the number of table index bits - 0001eeee - length or distance, eeee is the number of extra bits - 01100000 - end of block - 01000000 - invalid code - */ - -/* Maximum size of the dynamic table. The maximum number of code structures is - 1444, which is the sum of 852 for literal/length codes and 592 for distance - codes. These values were found by exhaustive searches using the program - examples/enough.c found in the zlib distribution. The arguments to that - program are the number of symbols, the initial root table size, and the - maximum bit length of a code. "enough 286 9 15" for literal/length codes - returns returns 852, and "enough 30 6 15" for distance codes returns 592. - The initial root table size (9 or 6) is found in the fifth argument of the - inflate_table() calls in inflate.c and infback.c. If the root table size is - changed, then these maximum sizes would be need to be recalculated and - updated. */ -#define ENOUGH_LENS 852 -#define ENOUGH_DISTS 592 -#define ENOUGH (ENOUGH_LENS + ENOUGH_DISTS) - -/* Type of code to build for inflate_table() */ -typedef enum -{ - CODES, - LENS, - DISTS -} codetype; - -int ZLIB_INTERNAL inflate_table - OF((codetype type, unsigned short FAR * lens, unsigned codes, code FAR * FAR * table, unsigned FAR * bits, unsigned short FAR * work)); diff --git a/base/poco/Foundation/src/pocomsg.mc b/base/poco/Foundation/src/pocomsg.mc deleted file mode 100644 index d1f6d6e7a8e..00000000000 --- a/base/poco/Foundation/src/pocomsg.mc +++ /dev/null @@ -1,87 +0,0 @@ -;// -;// pocomsg.mc[.h] -;// -;// The Poco message source/header file. -;// -;// NOTE: pocomsg.h is automatically generated from pocomsg.mc. -;// Never edit pocomsg.h directly! -;// -;// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -;// and Contributors. -;// -;// Permission is hereby granted, free of charge, to any person or organization -;// obtaining a copy of the software and accompanying documentation covered by -;// this license (the "Software") to use, reproduce, display, distribute, -;// execute, and transmit the Software, and to prepare derivative works of the -;// Software, and to permit third-parties to whom the Software is furnished to -;// do so, all subject to the following: -;// -;// The copyright notices in the Software and this entire statement, including -;// the above license grant, this restriction and the following disclaimer, -;// must be included in all copies of the Software, in whole or in part, and -;// all derivative works of the Software, unless such copies or derivative -;// works are solely in the form of machine-executable object code generated by -;// a source language processor. -;// -;// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -;// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -;// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -;// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -;// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -;// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -;// DEALINGS IN THE SOFTWARE. -;// - - -;// -;// Categories -;// -MessageId=0x1 -SymbolicName=POCO_CTG_FATAL -Language=English -Fatal -. -MessageId=0x2 -SymbolicName=POCO_CTG_CRITICAL -Language=English -Critical -. -MessageId=0x3 -SymbolicName=POCO_CTG_ERROR -Language=English -Error -. -MessageId=0x4 -SymbolicName=POCO_CTG_WARNING -Language=English -Warning -. -MessageId=0x5 -SymbolicName=POCO_CTG_NOTICE -Language=English -Notice -. -MessageId=0x6 -SymbolicName=POCO_CTG_INFORMATION -Language=English -Information -. -MessageId=0x7 -SymbolicName=POCO_CTG_DEBUG -Language=English -Debug -. -MessageId=0x8 -SymbolicName=POCO_CTG_TRACE -Language=English -Trace -. - -;// -;// Event Identifiers -;// -MessageId=0x1000 -SymbolicName=POCO_MSG_LOG -Language=English -%1 -. diff --git a/base/poco/Foundation/src/strtod.cc b/base/poco/Foundation/src/strtod.cc deleted file mode 100644 index 7c776943be5..00000000000 --- a/base/poco/Foundation/src/strtod.cc +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include "strtod.h" -#include "bignum.h" -#include "cached-powers.h" -#include "ieee.h" - -namespace double_conversion { - -// 2^53 = 9007199254740992. -// Any integer with at most 15 decimal digits will hence fit into a double -// (which has a 53bit significand) without loss of precision. -static const int kMaxExactDoubleIntegerDecimalDigits = 15; -// 2^64 = 18446744073709551616 > 10^19 -static const int kMaxUint64DecimalDigits = 19; - -// Max double: 1.7976931348623157 x 10^308 -// Min non-zero double: 4.9406564584124654 x 10^-324 -// Any x >= 10^309 is interpreted as +infinity. -// Any x <= 10^-324 is interpreted as 0. -// Note that 2.5e-324 (despite being smaller than the min double) will be read -// as non-zero (equal to the min non-zero double). -static const int kMaxDecimalPower = 309; -static const int kMinDecimalPower = -324; - -// 2^64 = 18446744073709551616 -static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF); - - -static const double exact_powers_of_ten[] = { - 1.0, // 10^0 - 10.0, - 100.0, - 1000.0, - 10000.0, - 100000.0, - 1000000.0, - 10000000.0, - 100000000.0, - 1000000000.0, - 10000000000.0, // 10^10 - 100000000000.0, - 1000000000000.0, - 10000000000000.0, - 100000000000000.0, - 1000000000000000.0, - 10000000000000000.0, - 100000000000000000.0, - 1000000000000000000.0, - 10000000000000000000.0, - 100000000000000000000.0, // 10^20 - 1000000000000000000000.0, - // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22 - 10000000000000000000000.0 -}; -static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten); - -// Maximum number of significant digits in the decimal representation. -// In fact the value is 772 (see conversions.cc), but to give us some margin -// we round up to 780. -static const int kMaxSignificantDecimalDigits = 780; - -static Vector TrimLeadingZeros(Vector buffer) { - for (int i = 0; i < buffer.length(); i++) { - if (buffer[i] != '0') { - return buffer.SubVector(i, buffer.length()); - } - } - return Vector(buffer.start(), 0); -} - - -static Vector TrimTrailingZeros(Vector buffer) { - for (int i = buffer.length() - 1; i >= 0; --i) { - if (buffer[i] != '0') { - return buffer.SubVector(0, i + 1); - } - } - return Vector(buffer.start(), 0); -} - - -static void CutToMaxSignificantDigits(Vector buffer, - int exponent, - char* significant_buffer, - int* significant_exponent) { - for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) { - significant_buffer[i] = buffer[i]; - } - // The input buffer has been trimmed. Therefore the last digit must be - // different from '0'. - ASSERT(buffer[buffer.length() - 1] != '0'); - // Set the last digit to be non-zero. This is sufficient to guarantee - // correct rounding. - significant_buffer[kMaxSignificantDecimalDigits - 1] = '1'; - *significant_exponent = - exponent + (buffer.length() - kMaxSignificantDecimalDigits); -} - - -// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits. -// If possible the input-buffer is reused, but if the buffer needs to be -// modified (due to cutting), then the input needs to be copied into the -// buffer_copy_space. -static void TrimAndCut(Vector buffer, int exponent, - char* buffer_copy_space, int space_size, - Vector* trimmed, int* updated_exponent) { - Vector left_trimmed = TrimLeadingZeros(buffer); - Vector right_trimmed = TrimTrailingZeros(left_trimmed); - exponent += left_trimmed.length() - right_trimmed.length(); - if (right_trimmed.length() > kMaxSignificantDecimalDigits) { - (void) space_size; // Mark variable as used. - ASSERT(space_size >= kMaxSignificantDecimalDigits); - CutToMaxSignificantDigits(right_trimmed, exponent, - buffer_copy_space, updated_exponent); - *trimmed = Vector(buffer_copy_space, - kMaxSignificantDecimalDigits); - } else { - *trimmed = right_trimmed; - *updated_exponent = exponent; - } -} - - -// Reads digits from the buffer and converts them to a uint64. -// Reads in as many digits as fit into a uint64. -// When the string starts with "1844674407370955161" no further digit is read. -// Since 2^64 = 18446744073709551616 it would still be possible read another -// digit if it was less or equal than 6, but this would complicate the code. -static uint64_t ReadUint64(Vector buffer, - int* number_of_read_digits) { - uint64_t result = 0; - int i = 0; - while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) { - int digit = buffer[i++] - '0'; - ASSERT(0 <= digit && digit <= 9); - result = 10 * result + digit; - } - *number_of_read_digits = i; - return result; -} - - -// Reads a DiyFp from the buffer. -// The returned DiyFp is not necessarily normalized. -// If remaining_decimals is zero then the returned DiyFp is accurate. -// Otherwise it has been rounded and has error of at most 1/2 ulp. -static void ReadDiyFp(Vector buffer, - DiyFp* result, - int* remaining_decimals) { - int read_digits; - uint64_t significand = ReadUint64(buffer, &read_digits); - if (buffer.length() == read_digits) { - *result = DiyFp(significand, 0); - *remaining_decimals = 0; - } else { - // Round the significand. - if (buffer[read_digits] >= '5') { - significand++; - } - // Compute the binary exponent. - int exponent = 0; - *result = DiyFp(significand, exponent); - *remaining_decimals = buffer.length() - read_digits; - } -} - - -static bool DoubleStrtod(Vector trimmed, - int exponent, - double* result) { -#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) - // On x86 the floating-point stack can be 64 or 80 bits wide. If it is - // 80 bits wide (as is the case on Linux) then double-rounding occurs and the - // result is not accurate. - // We know that Windows32 uses 64 bits and is therefore accurate. - // Note that the ARM simulator is compiled for 32bits. It therefore exhibits - // the same problem. - return false; -#endif - if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { - int read_digits; - // The trimmed input fits into a double. - // If the 10^exponent (resp. 10^-exponent) fits into a double too then we - // can compute the result-double simply by multiplying (resp. dividing) the - // two numbers. - // This is possible because IEEE guarantees that floating-point operations - // return the best possible approximation. - if (exponent < 0 && -exponent < kExactPowersOfTenSize) { - // 10^-exponent fits into a double. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result /= exact_powers_of_ten[-exponent]; - return true; - } - if (0 <= exponent && exponent < kExactPowersOfTenSize) { - // 10^exponent fits into a double. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result *= exact_powers_of_ten[exponent]; - return true; - } - int remaining_digits = - kMaxExactDoubleIntegerDecimalDigits - trimmed.length(); - if ((0 <= exponent) && - (exponent - remaining_digits < kExactPowersOfTenSize)) { - // The trimmed string was short and we can multiply it with - // 10^remaining_digits. As a result the remaining exponent now fits - // into a double too. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result *= exact_powers_of_ten[remaining_digits]; - *result *= exact_powers_of_ten[exponent - remaining_digits]; - return true; - } - } - return false; -} - - -// Returns 10^exponent as an exact DiyFp. -// The given exponent must be in the range [1; kDecimalExponentDistance[. -static DiyFp AdjustmentPowerOfTen(int exponent) { - ASSERT(0 < exponent); - ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance); - // Simply hardcode the remaining powers for the given decimal exponent - // distance. - ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8); - switch (exponent) { - case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60); - case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57); - case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54); - case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50); - case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47); - case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44); - case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40); - default: - UNREACHABLE(); - return DiyFp(0, 0); - } -} - - -// If the function returns true then the result is the correct double. -// Otherwise it is either the correct double or the double that is just below -// the correct double. -static bool DiyFpStrtod(Vector buffer, - int exponent, - double* result) { - DiyFp input; - int remaining_decimals; - ReadDiyFp(buffer, &input, &remaining_decimals); - // Since we may have dropped some digits the input is not accurate. - // If remaining_decimals is different than 0 than the error is at most - // .5 ulp (unit in the last place). - // We don't want to deal with fractions and therefore keep a common - // denominator. - const int kDenominatorLog = 3; - const int kDenominator = 1 << kDenominatorLog; - // Move the remaining decimals into the exponent. - exponent += remaining_decimals; - int error = (remaining_decimals == 0 ? 0 : kDenominator / 2); - - int old_e = input.e(); - input.Normalize(); - error <<= old_e - input.e(); - - ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent); - if (exponent < PowersOfTenCache::kMinDecimalExponent) { - *result = 0.0; - return true; - } - DiyFp cached_power; - int cached_decimal_exponent; - PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent, - &cached_power, - &cached_decimal_exponent); - - if (cached_decimal_exponent != exponent) { - int adjustment_exponent = exponent - cached_decimal_exponent; - DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); - input.Multiply(adjustment_power); - if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { - // The product of input with the adjustment power fits into a 64 bit - // integer. - ASSERT(DiyFp::kSignificandSize == 64); - } else { - // The adjustment power is exact. There is hence only an error of 0.5. - error += kDenominator / 2; - } - } - - input.Multiply(cached_power); - // The error introduced by a multiplication of a*b equals - // error_a + error_b + error_a*error_b/2^64 + 0.5 - // Substituting a with 'input' and b with 'cached_power' we have - // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), - // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 - int error_b = kDenominator / 2; - int error_ab = (error == 0 ? 0 : 1); // We round up to 1. - int fixed_error = kDenominator / 2; - error += error_b + error_ab + fixed_error; - - old_e = input.e(); - input.Normalize(); - error <<= old_e - input.e(); - - // See if the double's significand changes if we add/subtract the error. - int order_of_magnitude = DiyFp::kSignificandSize + input.e(); - int effective_significand_size = - Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude); - int precision_digits_count = - DiyFp::kSignificandSize - effective_significand_size; - if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) { - // This can only happen for very small denormals. In this case the - // half-way multiplied by the denominator exceeds the range of an uint64. - // Simply shift everything to the right. - int shift_amount = (precision_digits_count + kDenominatorLog) - - DiyFp::kSignificandSize + 1; - input.set_f(input.f() >> shift_amount); - input.set_e(input.e() + shift_amount); - // We add 1 for the lost precision of error, and kDenominator for - // the lost precision of input.f(). - error = (error >> shift_amount) + 1 + kDenominator; - precision_digits_count -= shift_amount; - } - // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. - ASSERT(DiyFp::kSignificandSize == 64); - ASSERT(precision_digits_count < 64); - uint64_t one64 = 1; - uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; - uint64_t precision_bits = input.f() & precision_bits_mask; - uint64_t half_way = one64 << (precision_digits_count - 1); - precision_bits *= kDenominator; - half_way *= kDenominator; - DiyFp rounded_input(input.f() >> precision_digits_count, - input.e() + precision_digits_count); - if (precision_bits >= half_way + error) { - rounded_input.set_f(rounded_input.f() + 1); - } - // If the last_bits are too close to the half-way case than we are too - // inaccurate and round down. In this case we return false so that we can - // fall back to a more precise algorithm. - - *result = Double(rounded_input).value(); - if (half_way - error < precision_bits && precision_bits < half_way + error) { - // Too imprecise. The caller will have to fall back to a slower version. - // However the returned number is guaranteed to be either the correct - // double, or the next-lower double. - return false; - } else { - return true; - } -} - - -// Returns -// - -1 if buffer*10^exponent < diy_fp. -// - 0 if buffer*10^exponent == diy_fp. -// - +1 if buffer*10^exponent > diy_fp. -// Preconditions: -// buffer.length() + exponent <= kMaxDecimalPower + 1 -// buffer.length() + exponent > kMinDecimalPower -// buffer.length() <= kMaxDecimalSignificantDigits -static int CompareBufferWithDiyFp(Vector buffer, - int exponent, - DiyFp diy_fp) { - ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1); - ASSERT(buffer.length() + exponent > kMinDecimalPower); - ASSERT(buffer.length() <= kMaxSignificantDecimalDigits); - // Make sure that the Bignum will be able to hold all our numbers. - // Our Bignum implementation has a separate field for exponents. Shifts will - // consume at most one bigit (< 64 bits). - // ln(10) == 3.3219... - ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits); - Bignum buffer_bignum; - Bignum diy_fp_bignum; - buffer_bignum.AssignDecimalString(buffer); - diy_fp_bignum.AssignUInt64(diy_fp.f()); - if (exponent >= 0) { - buffer_bignum.MultiplyByPowerOfTen(exponent); - } else { - diy_fp_bignum.MultiplyByPowerOfTen(-exponent); - } - if (diy_fp.e() > 0) { - diy_fp_bignum.ShiftLeft(diy_fp.e()); - } else { - buffer_bignum.ShiftLeft(-diy_fp.e()); - } - return Bignum::Compare(buffer_bignum, diy_fp_bignum); -} - - -// Returns true if the guess is the correct double. -// Returns false, when guess is either correct or the next-lower double. -static bool ComputeGuess(Vector trimmed, int exponent, - double* guess) { - if (trimmed.length() == 0) { - *guess = 0.0; - return true; - } - if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) { - *guess = Double::Infinity(); - return true; - } - if (exponent + trimmed.length() <= kMinDecimalPower) { - *guess = 0.0; - return true; - } - - if (DoubleStrtod(trimmed, exponent, guess) || - DiyFpStrtod(trimmed, exponent, guess)) { - return true; - } - if (*guess == Double::Infinity()) { - return true; - } - return false; -} - -double Strtod(Vector buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; - - double guess; - bool is_correct = ComputeGuess(trimmed, exponent, &guess); - if (is_correct) return guess; - - DiyFp upper_boundary = Double(guess).UpperBoundary(); - int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); - if (comparison < 0) { - return guess; - } else if (comparison > 0) { - return Double(guess).NextDouble(); - } else if ((Double(guess).Significand() & 1) == 0) { - // Round towards even. - return guess; - } else { - return Double(guess).NextDouble(); - } -} - -float Strtof(Vector buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; - - double double_guess; - bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); - - float float_guess = static_cast(double_guess); - if (float_guess == double_guess) { - // This shortcut triggers for integer values. - return float_guess; - } - - // We must catch double-rounding. Say the double has been rounded up, and is - // now a boundary of a float, and rounds up again. This is why we have to - // look at previous too. - // Example (in decimal numbers): - // input: 12349 - // high-precision (4 digits): 1235 - // low-precision (3 digits): - // when read from input: 123 - // when rounded from high precision: 124. - // To do this we simply look at the neighbors of the correct result and see - // if they would round to the same float. If the guess is not correct we have - // to look at four values (since two different doubles could be the correct - // double). - - double double_next = Double(double_guess).NextDouble(); - double double_previous = Double(double_guess).PreviousDouble(); - - float f1 = static_cast(double_previous); - float f2 = float_guess; - float f3 = static_cast(double_next); - float f4; - if (is_correct) { - f4 = f3; - } else { - double double_next2 = Double(double_next).NextDouble(); - f4 = static_cast(double_next2); - } - (void) f2; // Mark variable as used. - ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4); - - // If the guess doesn't lie near a single-precision boundary we can simply - // return its float-value. - if (f1 == f4) { - return float_guess; - } - - ASSERT((f1 != f2 && f2 == f3 && f3 == f4) || - (f1 == f2 && f2 != f3 && f3 == f4) || - (f1 == f2 && f2 == f3 && f3 != f4)); - - // guess and next are the two possible candidates (in the same way that - // double_guess was the lower candidate for a double-precision guess). - float guess = f1; - float next = f4; - DiyFp upper_boundary; - if (guess == 0.0f) { - float min_float = 1e-45f; - upper_boundary = Double(static_cast(min_float) / 2).AsDiyFp(); - } else { - upper_boundary = Single(guess).UpperBoundary(); - } - int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); - if (comparison < 0) { - return guess; - } else if (comparison > 0) { - return next; - } else if ((Single(guess).Significand() & 1) == 0) { - // Round towards even. - return guess; - } else { - return next; - } -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/strtod.h b/base/poco/Foundation/src/strtod.h deleted file mode 100644 index 66f90253e32..00000000000 --- a/base/poco/Foundation/src/strtod.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_STRTOD_H_ -#define DOUBLE_CONVERSION_STRTOD_H_ - -#include "utils.h" - -namespace double_conversion -{ - -// The buffer must only contain digits in the range [0-9]. It must not -// contain a dot or a sign. It must not start with '0', and must not be empty. -double Strtod(Vector buffer, int exponent); - -// The buffer must only contain digits in the range [0-9]. It must not -// contain a dot or a sign. It must not start with '0', and must not be empty. -float Strtof(Vector buffer, int exponent); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_STRTOD_H_ diff --git a/base/poco/Foundation/src/trees.c b/base/poco/Foundation/src/trees.c deleted file mode 100644 index b9d998f1b32..00000000000 --- a/base/poco/Foundation/src/trees.c +++ /dev/null @@ -1,1203 +0,0 @@ -/* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2017 Jean-loup Gailly - * detect_data_type() function provided freely by Cosmin Truta, 2006 - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process uses several Huffman trees. The more - * common source values are represented by shorter bit sequences. - * - * Each code tree is stored in a compressed form which is itself - * a Huffman encoding of the lengths of all the code strings (in - * ascending order by source values). The actual code strings are - * reconstructed from the lengths in the inflate process, as described - * in the deflate specification. - * - * REFERENCES - * - * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". - * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc - * - * Storer, James A. - * Data Compression: Methods and Theory, pp. 49-50. - * Computer Science Press, 1988. ISBN 0-7167-8156-5. - * - * Sedgewick, R. - * Algorithms, p290. - * Addison-Wesley, 1983. ISBN 0-201-06672-6. - */ - -/* @(#) $Id$ */ - -/* #define GEN_TREES_H */ - -#include "deflate.h" - -#ifdef ZLIB_DEBUG -# include -#endif - -/* =========================================================================== - * Constants - */ - -#define MAX_BL_BITS 7 -/* Bit length codes must not exceed MAX_BL_BITS bits */ - -#define END_BLOCK 256 -/* end of block literal code */ - -#define REP_3_6 16 -/* repeat previous bit length 3-6 times (2 bits of repeat count) */ - -#define REPZ_3_10 17 -/* repeat a zero length 3-10 times (3 bits of repeat count) */ - -#define REPZ_11_138 18 -/* repeat a zero length 11-138 times (7 bits of repeat count) */ - -local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ - = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; - -local const int extra_dbits[D_CODES] /* extra bits for each distance code */ - = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; - -local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ - = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; - -local const uch bl_order[BL_CODES] - = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; -/* The lengths of the bit length codes are sent in order of decreasing - * probability, to avoid transmitting the lengths for unused bit length codes. - */ - -/* =========================================================================== - * Local data. These are initialized only once. - */ - -#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ - -#if defined(GEN_TREES_H) || !defined(STDC) -/* non ANSI compilers may not accept trees.h */ - -local ct_data static_ltree[L_CODES+2]; -/* The static literal tree. Since the bit lengths are imposed, there is no - * need for the L_CODES extra codes used during heap construction. However - * The codes 286 and 287 are needed to build a canonical tree (see _tr_init - * below). - */ - -local ct_data static_dtree[D_CODES]; -/* The static distance tree. (Actually a trivial tree since all codes use - * 5 bits.) - */ - -uch _dist_code[DIST_CODE_LEN]; -/* Distance codes. The first 256 values correspond to the distances - * 3 .. 258, the last 256 values correspond to the top 8 bits of - * the 15 bit distances. - */ - -uch _length_code[MAX_MATCH-MIN_MATCH+1]; -/* length code for each normalized match length (0 == MIN_MATCH) */ - -local int base_length[LENGTH_CODES]; -/* First normalized length for each code (0 = MIN_MATCH) */ - -local int base_dist[D_CODES]; -/* First normalized distance for each code (0 = distance of 1) */ - -#else -# include "trees.h" -#endif /* GEN_TREES_H */ - -struct static_tree_desc_s { - const ct_data *static_tree; /* static tree or NULL */ - const intf *extra_bits; /* extra bits for each code or NULL */ - int extra_base; /* base index for extra_bits */ - int elems; /* max number of elements in the tree */ - int max_length; /* max bit length for the codes */ -}; - -local const static_tree_desc static_l_desc = -{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; - -local const static_tree_desc static_d_desc = -{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; - -local const static_tree_desc static_bl_desc = -{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; - -/* =========================================================================== - * Local (static) routines in this file. - */ - -local void tr_static_init OF((void)); -local void init_block OF((deflate_state *s)); -local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); -local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); -local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); -local void build_tree OF((deflate_state *s, tree_desc *desc)); -local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); -local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); -local int build_bl_tree OF((deflate_state *s)); -local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, - int blcodes)); -local void compress_block OF((deflate_state *s, const ct_data *ltree, - const ct_data *dtree)); -local int detect_data_type OF((deflate_state *s)); -local unsigned bi_reverse OF((unsigned value, int length)); -local void bi_windup OF((deflate_state *s)); -local void bi_flush OF((deflate_state *s)); - -#ifdef GEN_TREES_H -local void gen_trees_header OF((void)); -#endif - -#ifndef ZLIB_DEBUG -# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) - /* Send a code of the given tree. c and tree must not have side effects */ - -#else /* !ZLIB_DEBUG */ -# define send_code(s, c, tree) \ - { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ - send_bits(s, tree[c].Code, tree[c].Len); } -#endif - -/* =========================================================================== - * Output a short LSB first on the stream. - * IN assertion: there is enough room in pendingBuf. - */ -#define put_short(s, w) { \ - put_byte(s, (uch)((w) & 0xff)); \ - put_byte(s, (uch)((ush)(w) >> 8)); \ -} - -/* =========================================================================== - * Send a value on a given number of bits. - * IN assertion: length <= 16 and value fits in length bits. - */ -#ifdef ZLIB_DEBUG -local void send_bits OF((deflate_state *s, int value, int length)); - -local void send_bits(s, value, length) - deflate_state *s; - int value; /* value to send */ - int length; /* number of bits */ -{ - Tracevv((stderr," l %2d v %4x ", length, value)); - Assert(length > 0 && length <= 15, "invalid length"); - s->bits_sent += (ulg)length; - - /* If not enough room in bi_buf, use (valid) bits from bi_buf and - * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) - * unused bits in value. - */ - if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (ush)value << s->bi_valid; - put_short(s, s->bi_buf); - s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); - s->bi_valid += length - Buf_size; - } else { - s->bi_buf |= (ush)value << s->bi_valid; - s->bi_valid += length; - } -} -#else /* !ZLIB_DEBUG */ - -#define send_bits(s, value, length) \ -{ int len = length;\ - if (s->bi_valid > (int)Buf_size - len) {\ - int val = (int)value;\ - s->bi_buf |= (ush)val << s->bi_valid;\ - put_short(s, s->bi_buf);\ - s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ - s->bi_valid += len - Buf_size;\ - } else {\ - s->bi_buf |= (ush)(value) << s->bi_valid;\ - s->bi_valid += len;\ - }\ -} -#endif /* ZLIB_DEBUG */ - - -/* the arguments must not have side effects */ - -/* =========================================================================== - * Initialize the various 'constant' tables. - */ -local void tr_static_init() -{ -#if defined(GEN_TREES_H) || !defined(STDC) - static int static_init_done = 0; - int n; /* iterates over tree elements */ - int bits; /* bit counter */ - int length; /* length value */ - int code; /* code value */ - int dist; /* distance index */ - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - if (static_init_done) return; - - /* For some embedded targets, global variables are not initialized: */ -#ifdef NO_INIT_GLOBAL_POINTERS - static_l_desc.static_tree = static_ltree; - static_l_desc.extra_bits = extra_lbits; - static_d_desc.static_tree = static_dtree; - static_d_desc.extra_bits = extra_dbits; - static_bl_desc.extra_bits = extra_blbits; -#endif - - /* Initialize the mapping length (0..255) -> length code (0..28) */ - length = 0; - for (code = 0; code < LENGTH_CODES-1; code++) { - base_length[code] = length; - for (n = 0; n < (1< dist code (0..29) */ - dist = 0; - for (code = 0 ; code < 16; code++) { - base_dist[code] = dist; - for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ - for ( ; code < D_CODES; code++) { - base_dist[code] = dist << 7; - for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { - _dist_code[256 + dist++] = (uch)code; - } - } - Assert (dist == 256, "tr_static_init: 256+dist != 512"); - - /* Construct the codes of the static literal tree */ - for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; - n = 0; - while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; - while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; - while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; - while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; - /* Codes 286 and 287 do not exist, but we must include them in the - * tree construction to get a canonical Huffman tree (longest code - * all ones) - */ - gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); - - /* The static distance tree is trivial: */ - for (n = 0; n < D_CODES; n++) { - static_dtree[n].Len = 5; - static_dtree[n].Code = bi_reverse((unsigned)n, 5); - } - static_init_done = 1; - -# ifdef GEN_TREES_H - gen_trees_header(); -# endif -#endif /* defined(GEN_TREES_H) || !defined(STDC) */ -} - -/* =========================================================================== - * Generate the file trees.h describing the static trees. - */ -#ifdef GEN_TREES_H -# ifndef ZLIB_DEBUG -# include -# endif - -# define SEPARATOR(i, last, width) \ - ((i) == (last)? "\n};\n\n" : \ - ((i) % (width) == (width)-1 ? ",\n" : ", ")) - -void gen_trees_header() -{ - FILE *header = fopen("trees.h", "w"); - int i; - - Assert (header != NULL, "Can't open trees.h"); - fprintf(header, - "/* header created automatically with -DGEN_TREES_H */\n\n"); - - fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); - for (i = 0; i < L_CODES+2; i++) { - fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, - static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); - } - - fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); - for (i = 0; i < D_CODES; i++) { - fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, - static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); - } - - fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); - for (i = 0; i < DIST_CODE_LEN; i++) { - fprintf(header, "%2u%s", _dist_code[i], - SEPARATOR(i, DIST_CODE_LEN-1, 20)); - } - - fprintf(header, - "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); - for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { - fprintf(header, "%2u%s", _length_code[i], - SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); - } - - fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); - for (i = 0; i < LENGTH_CODES; i++) { - fprintf(header, "%1u%s", base_length[i], - SEPARATOR(i, LENGTH_CODES-1, 20)); - } - - fprintf(header, "local const int base_dist[D_CODES] = {\n"); - for (i = 0; i < D_CODES; i++) { - fprintf(header, "%5u%s", base_dist[i], - SEPARATOR(i, D_CODES-1, 10)); - } - - fclose(header); -} -#endif /* GEN_TREES_H */ - -/* =========================================================================== - * Initialize the tree data structures for a new zlib stream. - */ -void ZLIB_INTERNAL _tr_init(s) - deflate_state *s; -{ - tr_static_init(); - - s->l_desc.dyn_tree = s->dyn_ltree; - s->l_desc.stat_desc = &static_l_desc; - - s->d_desc.dyn_tree = s->dyn_dtree; - s->d_desc.stat_desc = &static_d_desc; - - s->bl_desc.dyn_tree = s->bl_tree; - s->bl_desc.stat_desc = &static_bl_desc; - - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef ZLIB_DEBUG - s->compressed_len = 0L; - s->bits_sent = 0L; -#endif - - /* Initialize the first block of the first file: */ - init_block(s); -} - -/* =========================================================================== - * Initialize a new block. - */ -local void init_block(s) - deflate_state *s; -{ - int n; /* iterates over tree elements */ - - /* Initialize the trees. */ - for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; - for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; - for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; -} - -#define SMALLEST 1 -/* Index within the heap array of least frequent node in the Huffman tree */ - - -/* =========================================================================== - * Remove the smallest element from the heap and recreate the heap with - * one less element. Updates heap and heap_len. - */ -#define pqremove(s, tree, top) \ -{\ - top = s->heap[SMALLEST]; \ - s->heap[SMALLEST] = s->heap[s->heap_len--]; \ - pqdownheap(s, tree, SMALLEST); \ -} - -/* =========================================================================== - * Compares to subtrees, using the tree depth as tie breaker when - * the subtrees have equal frequency. This minimizes the worst case length. - */ -#define smaller(tree, n, m, depth) \ - (tree[n].Freq < tree[m].Freq || \ - (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) - -/* =========================================================================== - * Restore the heap property by moving down the tree starting at node k, - * exchanging a node with the smallest of its two sons if necessary, stopping - * when the heap property is re-established (each father smaller than its - * two sons). - */ -local void pqdownheap(s, tree, k) - deflate_state *s; - ct_data *tree; /* the tree to restore */ - int k; /* node to move down */ -{ - int v = s->heap[k]; - int j = k << 1; /* left son of k */ - while (j <= s->heap_len) { - /* Set j to the smallest of the two sons: */ - if (j < s->heap_len && - smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { - j++; - } - /* Exit if v is smaller than both sons */ - if (smaller(tree, v, s->heap[j], s->depth)) break; - - /* Exchange v with the smallest son */ - s->heap[k] = s->heap[j]; k = j; - - /* And continue down the tree, setting j to the left son of k */ - j <<= 1; - } - s->heap[k] = v; -} - -/* =========================================================================== - * Compute the optimal bit lengths for a tree and update the total bit length - * for the current block. - * IN assertion: the fields freq and dad are set, heap[heap_max] and - * above are the tree nodes sorted by increasing frequency. - * OUT assertions: the field len is set to the optimal bit length, the - * array bl_count contains the frequencies for each bit length. - * The length opt_len is updated; static_len is also updated if stree is - * not null. - */ -local void gen_bitlen(s, desc) - deflate_state *s; - tree_desc *desc; /* the tree descriptor */ -{ - ct_data *tree = desc->dyn_tree; - int max_code = desc->max_code; - const ct_data *stree = desc->stat_desc->static_tree; - const intf *extra = desc->stat_desc->extra_bits; - int base = desc->stat_desc->extra_base; - int max_length = desc->stat_desc->max_length; - int h; /* heap index */ - int n, m; /* iterate over the tree elements */ - int bits; /* bit length */ - int xbits; /* extra bits */ - ush f; /* frequency */ - int overflow = 0; /* number of elements with bit length too large */ - - for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; - - /* In a first pass, compute the optimal bit lengths (which may - * overflow in the case of the bit length tree). - */ - tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ - - for (h = s->heap_max+1; h < HEAP_SIZE; h++) { - n = s->heap[h]; - bits = tree[tree[n].Dad].Len + 1; - if (bits > max_length) bits = max_length, overflow++; - tree[n].Len = (ush)bits; - /* We overwrite tree[n].Dad which is no longer needed */ - - if (n > max_code) continue; /* not a leaf node */ - - s->bl_count[bits]++; - xbits = 0; - if (n >= base) xbits = extra[n-base]; - f = tree[n].Freq; - s->opt_len += (ulg)f * (unsigned)(bits + xbits); - if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); - } - if (overflow == 0) return; - - Tracev((stderr,"\nbit length overflow\n")); - /* This happens for example on obj2 and pic of the Calgary corpus */ - - /* Find the first bit length which could increase: */ - do { - bits = max_length-1; - while (s->bl_count[bits] == 0) bits--; - s->bl_count[bits]--; /* move one leaf down the tree */ - s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ - s->bl_count[max_length]--; - /* The brother of the overflow item also moves one step up, - * but this does not affect bl_count[max_length] - */ - overflow -= 2; - } while (overflow > 0); - - /* Now recompute all bit lengths, scanning in increasing frequency. - * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all - * lengths instead of fixing only the wrong ones. This idea is taken - * from 'ar' written by Haruhiko Okumura.) - */ - for (bits = max_length; bits != 0; bits--) { - n = s->bl_count[bits]; - while (n != 0) { - m = s->heap[--h]; - if (m > max_code) continue; - if ((unsigned) tree[m].Len != (unsigned) bits) { - Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); - s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; - tree[m].Len = (ush)bits; - } - n--; - } - } -} - -/* =========================================================================== - * Generate the codes for a given tree and bit counts (which need not be - * optimal). - * IN assertion: the array bl_count contains the bit length statistics for - * the given tree and the field len is set for all tree elements. - * OUT assertion: the field code is set for all tree elements of non - * zero code length. - */ -local void gen_codes (tree, max_code, bl_count) - ct_data *tree; /* the tree to decorate */ - int max_code; /* largest code with non zero frequency */ - ushf *bl_count; /* number of codes at each bit length */ -{ - ush next_code[MAX_BITS+1]; /* next code value for each bit length */ - unsigned code = 0; /* running code value */ - int bits; /* bit index */ - int n; /* code index */ - - /* The distribution counts are first used to generate the code values - * without bit reversal. - */ - for (bits = 1; bits <= MAX_BITS; bits++) { - code = (code + bl_count[bits-1]) << 1; - next_code[bits] = (ush)code; - } - /* Check that the bit counts in bl_count are consistent. The last code - * must be all ones. - */ - Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; - const ct_data *stree = desc->stat_desc->static_tree; - int elems = desc->stat_desc->elems; - int n, m; /* iterate over heap elements */ - int max_code = -1; /* largest code with non zero frequency */ - int node; /* new node being created */ - - /* Construct the initial heap, with least frequent element in - * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. - * heap[0] is not used. - */ - s->heap_len = 0, s->heap_max = HEAP_SIZE; - - for (n = 0; n < elems; n++) { - if (tree[n].Freq != 0) { - s->heap[++(s->heap_len)] = max_code = n; - s->depth[n] = 0; - } else { - tree[n].Len = 0; - } - } - - /* The pkzip format requires that at least one distance code exists, - * and that at least one bit should be sent even if there is only one - * possible code. So to avoid special checks later on we force at least - * two codes of non zero frequency. - */ - while (s->heap_len < 2) { - node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); - tree[node].Freq = 1; - s->depth[node] = 0; - s->opt_len--; if (stree) s->static_len -= stree[node].Len; - /* node is 0 or 1 so it does not have extra bits */ - } - desc->max_code = max_code; - - /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, - * establish sub-heaps of increasing lengths: - */ - for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); - - /* Construct the Huffman tree by repeatedly combining the least two - * frequent nodes. - */ - node = elems; /* next internal node of the tree */ - do { - pqremove(s, tree, n); /* n = node of least frequency */ - m = s->heap[SMALLEST]; /* m = node of next least frequency */ - - s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ - s->heap[--(s->heap_max)] = m; - - /* Create a new node father of n and m */ - tree[node].Freq = tree[n].Freq + tree[m].Freq; - s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? - s->depth[n] : s->depth[m]) + 1); - tree[n].Dad = tree[m].Dad = (ush)node; -#ifdef DUMP_BL_TREE - if (tree == s->bl_tree) { - fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", - node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); - } -#endif - /* and insert the new node in the heap */ - s->heap[SMALLEST] = node++; - pqdownheap(s, tree, SMALLEST); - - } while (s->heap_len >= 2); - - s->heap[--(s->heap_max)] = s->heap[SMALLEST]; - - /* At this point, the fields freq and dad are set. We can now - * generate the bit lengths. - */ - gen_bitlen(s, (tree_desc *)desc); - - /* The field len is now set, we can generate the bit codes */ - gen_codes ((ct_data *)tree, max_code, s->bl_count); -} - -/* =========================================================================== - * Scan a literal or distance tree to determine the frequencies of the codes - * in the bit length tree. - */ -local void scan_tree (s, tree, max_code) - deflate_state *s; - ct_data *tree; /* the tree to be scanned */ - int max_code; /* and its largest code of non zero frequency */ -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - if (nextlen == 0) max_count = 138, min_count = 3; - tree[max_code+1].Len = (ush)0xffff; /* guard */ - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - s->bl_tree[curlen].Freq += count; - } else if (curlen != 0) { - if (curlen != prevlen) s->bl_tree[curlen].Freq++; - s->bl_tree[REP_3_6].Freq++; - } else if (count <= 10) { - s->bl_tree[REPZ_3_10].Freq++; - } else { - s->bl_tree[REPZ_11_138].Freq++; - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Send a literal or distance tree in compressed form, using the codes in - * bl_tree. - */ -local void send_tree (s, tree, max_code) - deflate_state *s; - ct_data *tree; /* the tree to be scanned */ - int max_code; /* and its largest code of non zero frequency */ -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - /* tree[max_code+1].Len = -1; */ /* guard already set */ - if (nextlen == 0) max_count = 138, min_count = 3; - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - do { send_code(s, curlen, s->bl_tree); } while (--count != 0); - - } else if (curlen != 0) { - if (curlen != prevlen) { - send_code(s, curlen, s->bl_tree); count--; - } - Assert(count >= 3 && count <= 6, " 3_6?"); - send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); - - } else if (count <= 10) { - send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); - - } else { - send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Construct the Huffman tree for the bit lengths and return the index in - * bl_order of the last bit length code to send. - */ -local int build_bl_tree(s) - deflate_state *s; -{ - int max_blindex; /* index of last bit length code of non zero freq */ - - /* Determine the bit length frequencies for literal and distance trees */ - scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); - scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); - - /* Build the bit length tree: */ - build_tree(s, (tree_desc *)(&(s->bl_desc))); - /* opt_len now includes the length of the tree representations, except - * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. - */ - - /* Determine the number of bit length codes to send. The pkzip format - * requires that at least 4 bit length codes be sent. (appnote.txt says - * 3 but the actual value used is 4.) - */ - for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { - if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; - } - /* Update opt_len to include the bit length tree and counts */ - s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; - Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", - s->opt_len, s->static_len)); - - return max_blindex; -} - -/* =========================================================================== - * Send the header for a block using dynamic Huffman trees: the counts, the - * lengths of the bit length codes, the literal tree and the distance tree. - * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. - */ -local void send_all_trees(s, lcodes, dcodes, blcodes) - deflate_state *s; - int lcodes, dcodes, blcodes; /* number of codes for each tree */ -{ - int rank; /* index in bl_order */ - - Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); - Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, - "too many codes"); - Tracev((stderr, "\nbl counts: ")); - send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ - send_bits(s, dcodes-1, 5); - send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ - for (rank = 0; rank < blcodes; rank++) { - Tracev((stderr, "\nbl code %2d ", bl_order[rank])); - send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); - } - Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ - Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ - Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); -} - -/* =========================================================================== - * Send a stored block - */ -void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) - deflate_state *s; - charf *buf; /* input block */ - ulg stored_len; /* length of input block */ - int last; /* one if this is the last block for a file */ -{ - send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ - bi_windup(s); /* align on byte boundary */ - put_short(s, (ush)stored_len); - put_short(s, (ush)~stored_len); - zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); - s->pending += stored_len; -#ifdef ZLIB_DEBUG - s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; - s->compressed_len += (stored_len + 4) << 3; - s->bits_sent += 2*16; - s->bits_sent += stored_len<<3; -#endif -} - -/* =========================================================================== - * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) - */ -void ZLIB_INTERNAL _tr_flush_bits(s) - deflate_state *s; -{ - bi_flush(s); -} - -/* =========================================================================== - * Send one empty static block to give enough lookahead for inflate. - * This takes 10 bits, of which 7 may remain in the bit buffer. - */ -void ZLIB_INTERNAL _tr_align(s) - deflate_state *s; -{ - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); -#ifdef ZLIB_DEBUG - s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ -#endif - bi_flush(s); -} - -/* =========================================================================== - * Determine the best encoding for the current block: dynamic trees, static - * trees or store, and write out the encoded block. - */ -void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) - deflate_state *s; - charf *buf; /* input block, or NULL if too old */ - ulg stored_len; /* length of input block */ - int last; /* one if this is the last block for a file */ -{ - ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ - int max_blindex = 0; /* index of last bit length code of non zero freq */ - - /* Build the Huffman trees unless a stored block is forced */ - if (s->level > 0) { - - /* Check if the file is binary or text */ - if (s->strm->data_type == Z_UNKNOWN) - s->strm->data_type = detect_data_type(s); - - /* Construct the literal and distance trees */ - build_tree(s, (tree_desc *)(&(s->l_desc))); - Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - - build_tree(s, (tree_desc *)(&(s->d_desc))); - Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - /* At this point, opt_len and static_len are the total bit lengths of - * the compressed block data, excluding the tree representations. - */ - - /* Build the bit length tree for the above two trees, and get the index - * in bl_order of the last bit length code to send. - */ - max_blindex = build_bl_tree(s); - - /* Determine the best encoding. Compute the block lengths in bytes. */ - opt_lenb = (s->opt_len+3+7)>>3; - static_lenb = (s->static_len+3+7)>>3; - - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); - - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - - } else { - Assert(buf != (char*)0, "lost buf"); - opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ - } - -#ifdef FORCE_STORED - if (buf != (char*)0) { /* force stored block */ -#else - if (stored_len+4 <= opt_lenb && buf != (char*)0) { - /* 4: two words for the lengths */ -#endif - /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. - * Otherwise we can't have processed more than WSIZE input bytes since - * the last block flush, because compression would have been - * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to - * transform a block into a stored block. - */ - _tr_stored_block(s, buf, stored_len, last); - -#ifdef FORCE_STATIC - } else if (static_lenb >= 0) { /* force static trees */ -#else - } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { -#endif - send_bits(s, (STATIC_TREES<<1)+last, 3); - compress_block(s, (const ct_data *)static_ltree, - (const ct_data *)static_dtree); -#ifdef ZLIB_DEBUG - s->compressed_len += 3 + s->static_len; -#endif - } else { - send_bits(s, (DYN_TREES<<1)+last, 3); - send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, - max_blindex+1); - compress_block(s, (const ct_data *)s->dyn_ltree, - (const ct_data *)s->dyn_dtree); -#ifdef ZLIB_DEBUG - s->compressed_len += 3 + s->opt_len; -#endif - } - Assert (s->compressed_len == s->bits_sent, "bad compressed size"); - /* The above check is made mod 2^32, for files larger than 512 MB - * and uLong implemented on 32 bits. - */ - init_block(s); - - if (last) { - bi_windup(s); -#ifdef ZLIB_DEBUG - s->compressed_len += 7; /* align on byte boundary */ -#endif - } - Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*last)); -} - -/* =========================================================================== - * Save the match info and tally the frequency counts. Return true if - * the current block must be flushed. - */ -int ZLIB_INTERNAL _tr_tally (s, dist, lc) - deflate_state *s; - unsigned dist; /* distance of matched string */ - unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ -{ - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; - if (dist == 0) { - /* lc is the unmatched char */ - s->dyn_ltree[lc].Freq++; - } else { - s->matches++; - /* Here, lc is the match length - MIN_MATCH */ - dist--; /* dist = match distance - 1 */ - Assert((ush)dist < (ush)MAX_DIST(s) && - (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && - (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); - - s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; - s->dyn_dtree[d_code(dist)].Freq++; - } - -#ifdef TRUNCATE_BLOCK - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } -#endif - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ -} - -/* =========================================================================== - * Send the block data compressed using the given Huffman trees - */ -local void compress_block(s, ltree, dtree) - deflate_state *s; - const ct_data *ltree; /* literal tree */ - const ct_data *dtree; /* distance tree */ -{ - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); - } else { - /* Here, lc is the match length - MIN_MATCH */ - code = _length_code[lc]; - send_code(s, code+LITERALS+1, ltree); /* send the length code */ - extra = extra_lbits[code]; - if (extra != 0) { - lc -= base_length[code]; - send_bits(s, lc, extra); /* send the extra length bits */ - } - dist--; /* dist is now the match distance - 1 */ - code = d_code(dist); - Assert (code < D_CODES, "bad d_code"); - - send_code(s, code, dtree); /* send the distance code */ - extra = extra_dbits[code]; - if (extra != 0) { - dist -= (unsigned)base_dist[code]; - send_bits(s, dist, extra); /* send the extra distance bits */ - } - } /* literal or match pair ? */ - - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, - "pendingBuf overflow"); - - } while (lx < s->last_lit); - - send_code(s, END_BLOCK, ltree); -} - -/* =========================================================================== - * Check if the data type is TEXT or BINARY, using the following algorithm: - * - TEXT if the two conditions below are satisfied: - * a) There are no non-portable control characters belonging to the - * "black list" (0..6, 14..25, 28..31). - * b) There is at least one printable character belonging to the - * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). - * - BINARY otherwise. - * - The following partially-portable control characters form a - * "gray list" that is ignored in this detection algorithm: - * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). - * IN assertion: the fields Freq of dyn_ltree are set. - */ -local int detect_data_type(s) - deflate_state *s; -{ - /* black_mask is the bit mask of black-listed bytes - * set bits 0..6, 14..25, and 28..31 - * 0xf3ffc07f = binary 11110011111111111100000001111111 - */ - unsigned long black_mask = 0xf3ffc07fUL; - int n; - - /* Check for non-textual ("black-listed") bytes. */ - for (n = 0; n <= 31; n++, black_mask >>= 1) - if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) - return Z_BINARY; - - /* Check for textual ("white-listed") bytes. */ - if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 - || s->dyn_ltree[13].Freq != 0) - return Z_TEXT; - for (n = 32; n < LITERALS; n++) - if (s->dyn_ltree[n].Freq != 0) - return Z_TEXT; - - /* There are no "black-listed" or "white-listed" bytes: - * this stream either is empty or has tolerated ("gray-listed") bytes only. - */ - return Z_BINARY; -} - -/* =========================================================================== - * Reverse the first len bits of a code, using straightforward code (a faster - * method would use a table) - * IN assertion: 1 <= len <= 15 - */ -local unsigned bi_reverse(code, len) - unsigned code; /* the value to invert */ - int len; /* its bit length */ -{ - register unsigned res = 0; - do { - res |= code & 1; - code >>= 1, res <<= 1; - } while (--len > 0); - return res >> 1; -} - -/* =========================================================================== - * Flush the bit buffer, keeping at most 7 bits in it. - */ -local void bi_flush(s) - deflate_state *s; -{ - if (s->bi_valid == 16) { - put_short(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else if (s->bi_valid >= 8) { - put_byte(s, (Byte)s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } -} - -/* =========================================================================== - * Flush the bit buffer and align the output on a byte boundary - */ -local void bi_windup(s) - deflate_state *s; -{ - if (s->bi_valid > 8) { - put_short(s, s->bi_buf); - } else if (s->bi_valid > 0) { - put_byte(s, (Byte)s->bi_buf); - } - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef ZLIB_DEBUG - s->bits_sent = (s->bits_sent+7) & ~7; -#endif -} diff --git a/base/poco/Foundation/src/trees.h b/base/poco/Foundation/src/trees.h deleted file mode 100644 index 40ea61f35a6..00000000000 --- a/base/poco/Foundation/src/trees.h +++ /dev/null @@ -1,74 +0,0 @@ -/* header created automatically with -DGEN_TREES_H */ - -local const ct_data static_ltree[L_CODES + 2] - = {{{12}, {8}}, {{140}, {8}}, {{76}, {8}}, {{204}, {8}}, {{44}, {8}}, {{172}, {8}}, {{108}, {8}}, {{236}, {8}}, {{28}, {8}}, - {{156}, {8}}, {{92}, {8}}, {{220}, {8}}, {{60}, {8}}, {{188}, {8}}, {{124}, {8}}, {{252}, {8}}, {{2}, {8}}, {{130}, {8}}, - {{66}, {8}}, {{194}, {8}}, {{34}, {8}}, {{162}, {8}}, {{98}, {8}}, {{226}, {8}}, {{18}, {8}}, {{146}, {8}}, {{82}, {8}}, - {{210}, {8}}, {{50}, {8}}, {{178}, {8}}, {{114}, {8}}, {{242}, {8}}, {{10}, {8}}, {{138}, {8}}, {{74}, {8}}, {{202}, {8}}, - {{42}, {8}}, {{170}, {8}}, {{106}, {8}}, {{234}, {8}}, {{26}, {8}}, {{154}, {8}}, {{90}, {8}}, {{218}, {8}}, {{58}, {8}}, - {{186}, {8}}, {{122}, {8}}, {{250}, {8}}, {{6}, {8}}, {{134}, {8}}, {{70}, {8}}, {{198}, {8}}, {{38}, {8}}, {{166}, {8}}, - {{102}, {8}}, {{230}, {8}}, {{22}, {8}}, {{150}, {8}}, {{86}, {8}}, {{214}, {8}}, {{54}, {8}}, {{182}, {8}}, {{118}, {8}}, - {{246}, {8}}, {{14}, {8}}, {{142}, {8}}, {{78}, {8}}, {{206}, {8}}, {{46}, {8}}, {{174}, {8}}, {{110}, {8}}, {{238}, {8}}, - {{30}, {8}}, {{158}, {8}}, {{94}, {8}}, {{222}, {8}}, {{62}, {8}}, {{190}, {8}}, {{126}, {8}}, {{254}, {8}}, {{1}, {8}}, - {{129}, {8}}, {{65}, {8}}, {{193}, {8}}, {{33}, {8}}, {{161}, {8}}, {{97}, {8}}, {{225}, {8}}, {{17}, {8}}, {{145}, {8}}, - {{81}, {8}}, {{209}, {8}}, {{49}, {8}}, {{177}, {8}}, {{113}, {8}}, {{241}, {8}}, {{9}, {8}}, {{137}, {8}}, {{73}, {8}}, - {{201}, {8}}, {{41}, {8}}, {{169}, {8}}, {{105}, {8}}, {{233}, {8}}, {{25}, {8}}, {{153}, {8}}, {{89}, {8}}, {{217}, {8}}, - {{57}, {8}}, {{185}, {8}}, {{121}, {8}}, {{249}, {8}}, {{5}, {8}}, {{133}, {8}}, {{69}, {8}}, {{197}, {8}}, {{37}, {8}}, - {{165}, {8}}, {{101}, {8}}, {{229}, {8}}, {{21}, {8}}, {{149}, {8}}, {{85}, {8}}, {{213}, {8}}, {{53}, {8}}, {{181}, {8}}, - {{117}, {8}}, {{245}, {8}}, {{13}, {8}}, {{141}, {8}}, {{77}, {8}}, {{205}, {8}}, {{45}, {8}}, {{173}, {8}}, {{109}, {8}}, - {{237}, {8}}, {{29}, {8}}, {{157}, {8}}, {{93}, {8}}, {{221}, {8}}, {{61}, {8}}, {{189}, {8}}, {{125}, {8}}, {{253}, {8}}, - {{19}, {9}}, {{275}, {9}}, {{147}, {9}}, {{403}, {9}}, {{83}, {9}}, {{339}, {9}}, {{211}, {9}}, {{467}, {9}}, {{51}, {9}}, - {{307}, {9}}, {{179}, {9}}, {{435}, {9}}, {{115}, {9}}, {{371}, {9}}, {{243}, {9}}, {{499}, {9}}, {{11}, {9}}, {{267}, {9}}, - {{139}, {9}}, {{395}, {9}}, {{75}, {9}}, {{331}, {9}}, {{203}, {9}}, {{459}, {9}}, {{43}, {9}}, {{299}, {9}}, {{171}, {9}}, - {{427}, {9}}, {{107}, {9}}, {{363}, {9}}, {{235}, {9}}, {{491}, {9}}, {{27}, {9}}, {{283}, {9}}, {{155}, {9}}, {{411}, {9}}, - {{91}, {9}}, {{347}, {9}}, {{219}, {9}}, {{475}, {9}}, {{59}, {9}}, {{315}, {9}}, {{187}, {9}}, {{443}, {9}}, {{123}, {9}}, - {{379}, {9}}, {{251}, {9}}, {{507}, {9}}, {{7}, {9}}, {{263}, {9}}, {{135}, {9}}, {{391}, {9}}, {{71}, {9}}, {{327}, {9}}, - {{199}, {9}}, {{455}, {9}}, {{39}, {9}}, {{295}, {9}}, {{167}, {9}}, {{423}, {9}}, {{103}, {9}}, {{359}, {9}}, {{231}, {9}}, - {{487}, {9}}, {{23}, {9}}, {{279}, {9}}, {{151}, {9}}, {{407}, {9}}, {{87}, {9}}, {{343}, {9}}, {{215}, {9}}, {{471}, {9}}, - {{55}, {9}}, {{311}, {9}}, {{183}, {9}}, {{439}, {9}}, {{119}, {9}}, {{375}, {9}}, {{247}, {9}}, {{503}, {9}}, {{15}, {9}}, - {{271}, {9}}, {{143}, {9}}, {{399}, {9}}, {{79}, {9}}, {{335}, {9}}, {{207}, {9}}, {{463}, {9}}, {{47}, {9}}, {{303}, {9}}, - {{175}, {9}}, {{431}, {9}}, {{111}, {9}}, {{367}, {9}}, {{239}, {9}}, {{495}, {9}}, {{31}, {9}}, {{287}, {9}}, {{159}, {9}}, - {{415}, {9}}, {{95}, {9}}, {{351}, {9}}, {{223}, {9}}, {{479}, {9}}, {{63}, {9}}, {{319}, {9}}, {{191}, {9}}, {{447}, {9}}, - {{127}, {9}}, {{383}, {9}}, {{255}, {9}}, {{511}, {9}}, {{0}, {7}}, {{64}, {7}}, {{32}, {7}}, {{96}, {7}}, {{16}, {7}}, - {{80}, {7}}, {{48}, {7}}, {{112}, {7}}, {{8}, {7}}, {{72}, {7}}, {{40}, {7}}, {{104}, {7}}, {{24}, {7}}, {{88}, {7}}, - {{56}, {7}}, {{120}, {7}}, {{4}, {7}}, {{68}, {7}}, {{36}, {7}}, {{100}, {7}}, {{20}, {7}}, {{84}, {7}}, {{52}, {7}}, - {{116}, {7}}, {{3}, {8}}, {{131}, {8}}, {{67}, {8}}, {{195}, {8}}, {{35}, {8}}, {{163}, {8}}, {{99}, {8}}, {{227}, {8}}}; - -local const ct_data static_dtree[D_CODES] - = {{{0}, {5}}, {{16}, {5}}, {{8}, {5}}, {{24}, {5}}, {{4}, {5}}, {{20}, {5}}, {{12}, {5}}, {{28}, {5}}, {{2}, {5}}, {{18}, {5}}, - {{10}, {5}}, {{26}, {5}}, {{6}, {5}}, {{22}, {5}}, {{14}, {5}}, {{30}, {5}}, {{1}, {5}}, {{17}, {5}}, {{9}, {5}}, {{25}, {5}}, - {{5}, {5}}, {{21}, {5}}, {{13}, {5}}, {{29}, {5}}, {{3}, {5}}, {{19}, {5}}, {{11}, {5}}, {{27}, {5}}, {{7}, {5}}, {{23}, {5}}}; - -const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] - = {0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 0, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; - -const uch ZLIB_INTERNAL _length_code[MAX_MATCH - MIN_MATCH + 1] - = {0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28}; - -local const int base_length[LENGTH_CODES] - = {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 0}; - -local const int base_dist[D_CODES] = {0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, - 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576}; diff --git a/base/poco/Foundation/src/utils.h b/base/poco/Foundation/src/utils.h deleted file mode 100644 index bdfdaed9833..00000000000 --- a/base/poco/Foundation/src/utils.h +++ /dev/null @@ -1,311 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_UTILS_H_ -#define DOUBLE_CONVERSION_UTILS_H_ - -#include -#include - -#include -#ifndef ASSERT -# define ASSERT(condition) assert(condition); -#endif -#ifndef UNIMPLEMENTED -# define UNIMPLEMENTED() (abort()) -#endif -#ifndef UNREACHABLE -# define UNREACHABLE() (abort()) -#endif - -// Double operations detection based on target architecture. -// Linux uses a 80bit wide floating point stack on x86. This induces double -// rounding, which in turn leads to wrong results. -// An easy way to test if the floating-point operations are correct is to -// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then -// the result is equal to 89255e-22. -// The best way to test this, is to create a division-function and to compare -// the output of the division with the expected result. (Inlining must be -// disabled.) -// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) -#if defined(_M_X64) || defined(__x86_64__) || defined(__ARMEL__) || defined(_M_ARM) || defined(__arm__) || defined(__arm64__) \ - || defined(__avr32__) || defined(__hppa__) || defined(__ia64__) || defined(__mips__) || defined(__powerpc__) || defined(__ppc__) \ - || defined(__ppc64__) || defined(__sparc__) || defined(__sparc) || defined(__s390__) || defined(__SH4__) || defined(__alpha__) \ - || defined(_MIPS_ARCH_MIPS32R2) || defined(__riscv) || defined(__AARCH64EL__) || defined(nios2) || defined(__nios2) \ - || defined(__nios2__) -# define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 -#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) -# undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS -#else -# error Target architecture was not detected as supported by Double-Conversion. -#endif - -#if defined(__GNUC__) -# define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) -#else -# define DOUBLE_CONVERSION_UNUSED -#endif - - -# include - - -// The following macro works on both 32 and 64-bit platforms. -// Usage: instead of writing 0x1234567890123456 -// write UINT64_2PART_C(0x12345678,90123456); -#define UINT64_2PART_C(a, b) (((static_cast(a) << 32) + 0x##b##u)) - - -// The expression ARRAY_SIZE(a) is a compile-time constant of type -// size_t which represents the number of elements of the given -// array. You should only use ARRAY_SIZE on statically allocated -// arrays. -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(a) ((sizeof(a) / sizeof(*(a))) / static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - -// A macro to disallow the evil copy constructor and operator= functions -// This should be used in the private: declarations for a class -#ifndef DISALLOW_COPY_AND_ASSIGN -# define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName &); \ - void operator=(const TypeName &) -#endif - -// A macro to disallow all the implicit constructors, namely the -// default constructor, copy constructor and operator= functions. -// -// This should be used in the private: declarations for a class -// that wants to prevent anyone from instantiating it. This is -// especially useful for classes containing only static methods. -#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS -# define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - TypeName(); \ - DISALLOW_COPY_AND_ASSIGN(TypeName) -#endif - -namespace double_conversion -{ - -static const int kCharSize = sizeof(char); - -// Returns the maximum of the two parameters. -template -static T Max(T a, T b) -{ - return a < b ? b : a; -} - - -// Returns the minimum of the two parameters. -template -static T Min(T a, T b) -{ - return a < b ? a : b; -} - - -inline int StrLength(const char * string) -{ - size_t length = strlen(string); - ASSERT(length == static_cast(static_cast(length))); - return static_cast(length); -} - -// This is a simplified version of V8's Vector class. -template -class Vector -{ -public: - Vector() : start_(NULL), length_(0) { } - Vector(T * data, int length) : start_(data), length_(length) { ASSERT(length == 0 || (length > 0 && data != NULL)); } - - // Returns a vector using the same backing storage as this one, - // spanning from and including 'from', to but not including 'to'. - Vector SubVector(int from, int to) - { - ASSERT(to <= length_); - ASSERT(from < to); - ASSERT(0 <= from); - return Vector(start() + from, to - from); - } - - // Returns the length of the vector. - int length() const { return length_; } - - // Returns whether or not the vector is empty. - bool is_empty() const { return length_ == 0; } - - // Returns the pointer to the start of the data in the vector. - T * start() const { return start_; } - - // Access individual vector elements - checks bounds in debug mode. - T & operator[](int index) const - { - ASSERT(0 <= index && index < length_); - return start_[index]; - } - - T & first() { return start_[0]; } - - T & last() { return start_[length_ - 1]; } - -private: - T * start_; - int length_; -}; - - -// Helper class for building result strings in a character buffer. The -// purpose of the class is to use safe operations that checks the -// buffer bounds on all operations in debug mode. -class StringBuilder -{ -public: - StringBuilder(char * buffer, int size) : buffer_(buffer, size), position_(0) { } - - ~StringBuilder() - { - if (!is_finalized()) - Finalize(); - } - - int size() const { return buffer_.length(); } - - // Get the current position in the builder. - int position() const - { - ASSERT(!is_finalized()); - return position_; - } - - // Reset the position. - void Reset() { position_ = 0; } - - // Add a single character to the builder. It is not allowed to add - // 0-characters; use the Finalize() method to terminate the string - // instead. - void AddCharacter(char c) - { - ASSERT(c != '\0'); - ASSERT(!is_finalized() && position_ < buffer_.length()); - buffer_[position_++] = c; - } - - // Add an entire string to the builder. Uses strlen() internally to - // compute the length of the input string. - void AddString(const char * s) { AddSubstring(s, StrLength(s)); } - - // Add the first 'n' characters of the given string 's' to the - // builder. The input string must have enough characters. - void AddSubstring(const char * s, int n) - { - ASSERT(!is_finalized() && position_ + n < buffer_.length()); - ASSERT(static_cast(n) <= strlen(s)); - memmove(&buffer_[position_], s, n * kCharSize); - position_ += n; - } - - - // Add character padding to the builder. If count is non-positive, - // nothing is added to the builder. - void AddPadding(char c, int count) - { - for (int i = 0; i < count; i++) - { - AddCharacter(c); - } - } - - // Finalize the string by 0-terminating it and returning the buffer. - char * Finalize() - { - ASSERT(!is_finalized() && position_ < buffer_.length()); - buffer_[position_] = '\0'; - // Make sure nobody managed to add a 0-character to the - // buffer while building the string. - ASSERT(strlen(buffer_.start()) == static_cast(position_)); - position_ = -1; - ASSERT(is_finalized()); - return buffer_.start(); - } - -private: - Vector buffer_; - int position_; - - bool is_finalized() const { return position_ < 0; } - - DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); -}; - -// The type-based aliasing rule allows the compiler to assume that pointers of -// different types (for some definition of different) never alias each other. -// Thus the following code does not work: -// -// float f = foo(); -// int fbits = *(int*)(&f); -// -// The compiler 'knows' that the int pointer can't refer to f since the types -// don't match, so the compiler may cache f in a register, leaving random data -// in fbits. Using C++ style casts makes no difference, however a pointer to -// char data is assumed to alias any other pointer. This is the 'memcpy -// exception'. -// -// Bit_cast uses the memcpy exception to move the bits from a variable of one -// type of a variable of another type. Of course the end result is likely to -// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) -// will completely optimize BitCast away. -// -// There is an additional use for BitCast. -// Recent gccs will warn when they see casts that may result in breakage due to -// the type-based aliasing rule. If you have checked that there is no breakage -// you can use BitCast to cast one pointer type to another. This confuses gcc -// enough that it can no longer see that you have cast one pointer type to -// another thus avoiding the warning. -template -inline Dest BitCast(const Source & source) -{ - // Compile time assertion: sizeof(Dest) == sizeof(Source) - // A compile error here means your Dest and Source have different sizes. - DOUBLE_CONVERSION_UNUSED - typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; - - Dest dest; - memmove(&dest, &source, sizeof(dest)); - return dest; -} - -template -inline Dest BitCast(Source * source) -{ - return BitCast(reinterpret_cast(source)); -} - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_UTILS_H_ diff --git a/base/poco/Foundation/src/zutil.c b/base/poco/Foundation/src/zutil.c deleted file mode 100644 index 14a7bebb1b3..00000000000 --- a/base/poco/Foundation/src/zutil.c +++ /dev/null @@ -1,324 +0,0 @@ -/* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995-2005, 2010, 2011, 2012 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id: //poco/1.4/Foundation/src/zutil.c#3 $ */ - -#include "zutil.h" -#ifndef Z_SOLO -# include "gzguts.h" -#endif - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ -#endif - -z_const char * const z_errmsg[10] = { -"need dictionary", /* Z_NEED_DICT 2 */ -"stream end", /* Z_STREAM_END 1 */ -"", /* Z_OK 0 */ -"file error", /* Z_ERRNO (-1) */ -"stream error", /* Z_STREAM_ERROR (-2) */ -"data error", /* Z_DATA_ERROR (-3) */ -"insufficient memory", /* Z_MEM_ERROR (-4) */ -"buffer error", /* Z_BUF_ERROR (-5) */ -"incompatible version",/* Z_VERSION_ERROR (-6) */ -""}; - - -const char * ZEXPORT zlibVersion() -{ - return ZLIB_VERSION; -} - -uLong ZEXPORT zlibCompileFlags() -{ - uLong flags; - - flags = 0; - switch ((int)(sizeof(uInt))) { - case 2: break; - case 4: flags += 1; break; - case 8: flags += 2; break; - default: flags += 3; - } - switch ((int)(sizeof(uLong))) { - case 2: break; - case 4: flags += 1 << 2; break; - case 8: flags += 2 << 2; break; - default: flags += 3 << 2; - } - switch ((int)(sizeof(voidpf))) { - case 2: break; - case 4: flags += 1 << 4; break; - case 8: flags += 2 << 4; break; - default: flags += 3 << 4; - } - switch ((int)(sizeof(z_off_t))) { - case 2: break; - case 4: flags += 1 << 6; break; - case 8: flags += 2 << 6; break; - default: flags += 3 << 6; - } -#ifdef ZLIB_DEBUG - flags += 1 << 8; -#endif -#if defined(ASMV) || defined(ASMINF) - flags += 1 << 9; -#endif -#ifdef ZLIB_WINAPI - flags += 1 << 10; -#endif -#ifdef BUILDFIXED - flags += 1 << 12; -#endif -#ifdef DYNAMIC_CRC_TABLE - flags += 1 << 13; -#endif -#ifdef NO_GZCOMPRESS - flags += 1L << 16; -#endif -#ifdef NO_GZIP - flags += 1L << 17; -#endif -#ifdef PKZIP_BUG_WORKAROUND - flags += 1L << 20; -#endif -#ifdef FASTEST - flags += 1L << 21; -#endif -#if defined(STDC) || defined(Z_HAVE_STDARG_H) -# ifdef NO_vsnprintf - flags += 1L << 25; -# ifdef HAS_vsprintf_void - flags += 1L << 26; -# endif -# else -# ifdef HAS_vsnprintf_void - flags += 1L << 26; -# endif -# endif -#else - flags += 1L << 24; -# ifdef NO_snprintf - flags += 1L << 25; -# ifdef HAS_sprintf_void - flags += 1L << 26; -# endif -# else -# ifdef HAS_snprintf_void - flags += 1L << 26; -# endif -# endif -#endif - return flags; -} - -#ifdef ZLIB_DEBUG - -# ifndef verbose -# define verbose 0 -# endif -int ZLIB_INTERNAL z_verbose = verbose; - -void ZLIB_INTERNAL z_error (m) - char *m; -{ - fprintf(stderr, "%s\n", m); - exit(1); -} -#endif - -/* exported to allow conversion of error code to string for compress() and - * uncompress() - */ -const char * ZEXPORT zError(err) - int err; -{ - return ERR_MSG(err); -} - -#if defined(_WIN32_WCE) && _WIN32_WCE < 0x800 - /* The Microsoft C Run-Time Library for Windows CE doesn't have - * errno. We define it as a global variable to simplify porting. - * Its value is always 0 and should not be used. - */ - int errno = 0; -#endif - -#ifndef HAVE_MEMCPY - -void ZLIB_INTERNAL zmemcpy(dest, source, len) - Bytef* dest; - const Bytef* source; - uInt len; -{ - if (len == 0) return; - do { - *dest++ = *source++; /* ??? to be unrolled */ - } while (--len != 0); -} - -int ZLIB_INTERNAL zmemcmp(s1, s2, len) - const Bytef* s1; - const Bytef* s2; - uInt len; -{ - uInt j; - - for (j = 0; j < len; j++) { - if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; - } - return 0; -} - -void ZLIB_INTERNAL zmemzero(dest, len) - Bytef* dest; - uInt len; -{ - if (len == 0) return; - do { - *dest++ = 0; /* ??? to be unrolled */ - } while (--len != 0); -} -#endif - -#ifndef Z_SOLO - -#ifdef SYS16BIT - -#ifdef __TURBOC__ -/* Turbo C in 16-bit mode */ - -# define MY_ZCALLOC - -/* Turbo C malloc() does not allow dynamic allocation of 64K bytes - * and farmalloc(64K) returns a pointer with an offset of 8, so we - * must fix the pointer. Warning: the pointer must be put back to its - * original form in order to free it, use zcfree(). - */ - -#define MAX_PTR 10 -/* 10*64K = 640K */ - -local int next_ptr = 0; - -typedef struct ptr_table_s { - voidpf org_ptr; - voidpf new_ptr; -} ptr_table; - -local ptr_table table[MAX_PTR]; -/* This table is used to remember the original form of pointers - * to large buffers (64K). Such pointers are normalized with a zero offset. - * Since MS-DOS is not a preemptive multitasking OS, this table is not - * protected from concurrent access. This hack doesn't work anyway on - * a protected system like OS/2. Use Microsoft C instead. - */ - -voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) -{ - voidpf buf = opaque; /* just to make some compilers happy */ - ulg bsize = (ulg)items*size; - - /* If we allocate less than 65520 bytes, we assume that farmalloc - * will return a usable pointer which doesn't have to be normalized. - */ - if (bsize < 65520L) { - buf = farmalloc(bsize); - if (*(ush*)&buf != 0) return buf; - } else { - buf = farmalloc(bsize + 16L); - } - if (buf == NULL || next_ptr >= MAX_PTR) return NULL; - table[next_ptr].org_ptr = buf; - - /* Normalize the pointer to seg:0 */ - *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; - *(ush*)&buf = 0; - table[next_ptr++].new_ptr = buf; - return buf; -} - -void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) -{ - int n; - if (*(ush*)&ptr != 0) { /* object < 64K */ - farfree(ptr); - return; - } - /* Find the original pointer */ - for (n = 0; n < next_ptr; n++) { - if (ptr != table[n].new_ptr) continue; - - farfree(table[n].org_ptr); - while (++n < next_ptr) { - table[n-1] = table[n]; - } - next_ptr--; - return; - } - ptr = opaque; /* just to make some compilers happy */ - Assert(0, "zcfree: ptr not found"); -} - -#endif /* __TURBOC__ */ - - -#ifdef M_I86 -/* Microsoft C in 16-bit mode */ - -# define MY_ZCALLOC - -#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) -# define _halloc halloc -# define _hfree hfree -#endif - -voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) -{ - if (opaque) opaque = 0; /* to make compiler happy */ - return _halloc((long)items, size); -} - -void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) -{ - if (opaque) opaque = 0; /* to make compiler happy */ - _hfree(ptr); -} - -#endif /* M_I86 */ - -#endif /* SYS16BIT */ - - -#ifndef MY_ZCALLOC /* Any system without a special alloc function */ - -#ifndef STDC -extern voidp malloc OF((uInt size)); -extern voidp calloc OF((uInt items, uInt size)); -extern void free OF((voidpf ptr)); -#endif - -voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) - voidpf opaque; - unsigned items; - unsigned size; -{ - if (opaque) items += size - size; /* make compiler happy */ - return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : - (voidpf)calloc(items, size); -} - -void ZLIB_INTERNAL zcfree (opaque, ptr) - voidpf opaque; - voidpf ptr; -{ - free(ptr); - if (opaque) return; /* make compiler happy */ -} - -#endif /* MY_ZCALLOC */ - -#endif /* !Z_SOLO */ diff --git a/base/poco/Foundation/src/zutil.h b/base/poco/Foundation/src/zutil.h deleted file mode 100644 index 4deb4ec0db1..00000000000 --- a/base/poco/Foundation/src/zutil.h +++ /dev/null @@ -1,237 +0,0 @@ -/* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2013 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* @(#) $Id: //poco/1.4/Foundation/src/zutil.h#3 $ */ - -#ifndef ZUTIL_H -#define ZUTIL_H - -#ifdef HAVE_HIDDEN -# define ZLIB_INTERNAL __attribute__((visibility("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - -#include "zlib.h" - -#if defined(STDC) && !defined(Z_SOLO) -# include -# include -# include -#endif - -#ifdef Z_SOLO -typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ -#endif - -#ifndef local -# define local static -#endif -/* compile with -Dlocal if your debugger can't find static symbols */ - -typedef unsigned char uch; -typedef uch FAR uchf; -typedef unsigned short ush; -typedef ush FAR ushf; -typedef unsigned long ulg; - -extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ -/* (size given to avoid silly warnings with Visual C++) */ - -#define ERR_MSG(err) z_errmsg[Z_NEED_DICT - (err)] - -#define ERR_RETURN(strm, err) return (strm->msg = ERR_MSG(err), (err)) -/* To be used only when the state is known to be valid */ - -/* common constants */ - -#ifndef DEF_WBITS -# define DEF_WBITS MAX_WBITS -#endif -/* default windowBits for decompression. MAX_WBITS is for compression only */ - -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif -/* default memLevel */ - -#define STORED_BLOCK 0 -#define STATIC_TREES 1 -#define DYN_TREES 2 -/* The three kinds of block type */ - -#define MIN_MATCH 3 -#define MAX_MATCH 258 -/* The minimum and maximum match lengths */ - -#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ - -/* target dependencies */ - -#ifdef AMIGA -# define OS_CODE 0x01 -#endif - -#if defined(VAXC) || defined(VMS) -# define OS_CODE 0x02 -# define F_OPEN(name, mode) fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") -#endif - -#if defined(ATARI) || defined(atarist) -# define OS_CODE 0x05 -#endif - -#ifdef OS2 -# define OS_CODE 0x06 -# if defined(M_I86) && !defined(Z_SOLO) -# include -# endif -#endif - -#if defined(MACOS) || defined(TARGET_OS_MAC) -# define OS_CODE 0x07 -# ifndef Z_SOLO -# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include /* for fdopen */ -# else -# ifndef fdopen -# define fdopen(fd, mode) NULL /* No fdopen() */ -# endif -# endif -# endif -#endif - -#ifdef TOPS20 -# define OS_CODE 0x0a -#endif - -#ifdef WIN32 -# define OS_CODE 0x0b -#endif - -#ifdef __50SERIES /* Prime/PRIMOS */ -# define OS_CODE 0x0f -#endif - -#if defined(_BEOS_) || defined(RISCOS) -# define fdopen(fd, mode) NULL /* No fdopen() */ -#endif - - -/* provide prototypes for these when building zlib without LFS */ -#if !defined(_WIN32) && (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE - 0 == 0) -ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); -ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); -#endif - -/* common defaults */ - -#ifndef OS_CODE -# define OS_CODE 0x03 /* assume Unix */ -#endif - -#ifndef F_OPEN -# define F_OPEN(name, mode) fopen((name), (mode)) -#endif - -/* functions */ - -#if defined(pyr) || defined(Z_SOLO) -# define NO_MEMCPY -#endif -#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) -/* Use our own functions for small and medium model with MSC <= 5.0. - * You may have to use the same strategy for Borland C (untested). - * The __SC__ check is for Symantec. - */ -# define NO_MEMCPY -#endif -#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) -# define HAVE_MEMCPY -#endif -#ifdef HAVE_MEMCPY -# ifdef SMALL_MEDIUM /* MS-DOS small or medium model */ -# define zmemcpy _fmemcpy -# define zmemcmp _fmemcmp -# define zmemzero(dest, len) _fmemset(dest, 0, len) -# else -# define zmemcpy memcpy -# define zmemcmp memcmp -# define zmemzero(dest, len) memset(dest, 0, len) -# endif -#else -void ZLIB_INTERNAL zmemcpy OF((Bytef * dest, const Bytef * source, uInt len)); -int ZLIB_INTERNAL zmemcmp OF((const Bytef * s1, const Bytef * s2, uInt len)); -void ZLIB_INTERNAL zmemzero OF((Bytef * dest, uInt len)); -#endif - -/* Diagnostic functions */ -#ifdef ZLIB_DEBUG -# include -extern int ZLIB_INTERNAL z_verbose; -extern void ZLIB_INTERNAL z_error OF((char * m)); -# define Assert(cond, msg) \ - { \ - if (!(cond)) \ - z_error(msg); \ - } -# define Trace(x) \ - { \ - if (z_verbose >= 0) \ - fprintf x; \ - } -# define Tracev(x) \ - { \ - if (z_verbose > 0) \ - fprintf x; \ - } -# define Tracevv(x) \ - { \ - if (z_verbose > 1) \ - fprintf x; \ - } -# define Tracec(c, x) \ - { \ - if (z_verbose > 0 && (c)) \ - fprintf x; \ - } -# define Tracecv(c, x) \ - { \ - if (z_verbose > 1 && (c)) \ - fprintf x; \ - } -#else -# define Assert(cond, msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c, x) -# define Tracecv(c, x) -#endif - -#ifndef Z_SOLO -voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, unsigned size)); -void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); -#endif - -#define ZALLOC(strm, items, size) (*((strm)->zalloc))((strm)->opaque, (items), (size)) -#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) -#define TRY_FREE(s, p) \ - { \ - if (p) \ - ZFREE(s, p); \ - } - -/* Reverse the bytes in a 32-bit value */ -#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + (((q)&0xff00) << 8) + (((q)&0xff) << 24)) - -#endif /* ZUTIL_H */ diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h index de669aa90dd..dcb813b75bc 100644 --- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h +++ b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h @@ -90,6 +90,9 @@ namespace MongoDB Poco::Net::SocketAddress address() const; /// Returns the address of the MongoDB server. + + const std::string & uri() const; + /// Returns the uri on which the connection was made. void connect(const std::string & hostAndPort); /// Connects to the given MongoDB server. @@ -148,6 +151,7 @@ namespace MongoDB private: Poco::Net::SocketAddress _address; Poco::Net::StreamSocket _socket; + std::string _uri; }; @@ -158,6 +162,10 @@ namespace MongoDB { return _address; } + inline const std::string & Connection::uri() const + { + return _uri; + } } diff --git a/base/poco/MongoDB/src/Connection.cpp b/base/poco/MongoDB/src/Connection.cpp index 56bb192cec2..38c31d2250a 100644 --- a/base/poco/MongoDB/src/Connection.cpp +++ b/base/poco/MongoDB/src/Connection.cpp @@ -145,68 +145,155 @@ void Connection::connect(const Poco::Net::StreamSocket& socket) void Connection::connect(const std::string& uri, SocketFactory& socketFactory) { - Poco::URI theURI(uri); - if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri); + std::vector strAddresses; + std::string newURI; - std::string userInfo = theURI.getUserInfo(); - std::string host = theURI.getHost(); - Poco::UInt16 port = theURI.getPort(); - if (port == 0) port = 27017; + if (uri.find(',') != std::string::npos) + { + size_t pos; + size_t head = 0; + if ((pos = uri.find("@")) != std::string::npos) + { + head = pos + 1; + } + else if ((pos = uri.find("://")) != std::string::npos) + { + head = pos + 3; + } - std::string databaseName = theURI.getPath(); - if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1); - if (databaseName.empty()) databaseName = "admin"; + std::string tempstr; + std::string::const_iterator it = uri.begin(); + it += head; + size_t tail = head; + for (;it != uri.end() && *it != '?' && *it != '/'; ++it) + { + tempstr += *it; + tail++; + } - bool ssl = false; - Poco::Timespan connectTimeout; - Poco::Timespan socketTimeout; - std::string authMechanism = Database::AUTH_SCRAM_SHA1; + it = tempstr.begin(); + std::string token; + for (;it != tempstr.end(); ++it) + { + if (*it == ',') + { + newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length()); + strAddresses.push_back(newURI); + token = ""; + } + else + { + token += *it; + } + } + newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length()); + strAddresses.push_back(newURI); + } + else + { + strAddresses.push_back(uri); + } - Poco::URI::QueryParameters params = theURI.getQueryParameters(); - for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it) - { - if (it->first == "ssl") - { - ssl = (it->second == "true"); - } - else if (it->first == "connectTimeoutMS") - { - connectTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); - } - else if (it->first == "socketTimeoutMS") - { - socketTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); - } - else if (it->first == "authMechanism") - { - authMechanism = it->second; - } - } + newURI = strAddresses.front(); + Poco::URI theURI(newURI); + if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri); - connect(socketFactory.createSocket(host, port, connectTimeout, ssl)); + std::string userInfo = theURI.getUserInfo(); + std::string databaseName = theURI.getPath(); + if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1); + if (databaseName.empty()) databaseName = "admin"; - if (socketTimeout > 0) - { - _socket.setSendTimeout(socketTimeout); - _socket.setReceiveTimeout(socketTimeout); - } + bool ssl = false; + Poco::Timespan connectTimeout; + Poco::Timespan socketTimeout; + std::string authMechanism = Database::AUTH_SCRAM_SHA1; + std::string readPreference="primary"; - if (!userInfo.empty()) - { - std::string username; - std::string password; - std::string::size_type pos = userInfo.find(':'); - if (pos != std::string::npos) - { - username.assign(userInfo, 0, pos++); - password.assign(userInfo, pos, userInfo.size() - pos); - } - else username = userInfo; + Poco::URI::QueryParameters params = theURI.getQueryParameters(); + for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it) + { + if (it->first == "ssl") + { + ssl = (it->second == "true"); + } + else if (it->first == "connectTimeoutMS") + { + connectTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); + } + else if (it->first == "socketTimeoutMS") + { + socketTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); + } + else if (it->first == "authMechanism") + { + authMechanism = it->second; + } + else if (it->first == "readPreference") + { + readPreference= it->second; + } + } - Database database(databaseName); - if (!database.authenticate(*this, username, password, authMechanism)) - throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username)); - } + for (std::vector::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it) + { + newURI = *it; + theURI = Poco::URI(newURI); + + std::string host = theURI.getHost(); + Poco::UInt16 port = theURI.getPort(); + if (port == 0) port = 27017; + + connect(socketFactory.createSocket(host, port, connectTimeout, ssl)); + _uri = newURI; + if (socketTimeout > 0) + { + _socket.setSendTimeout(socketTimeout); + _socket.setReceiveTimeout(socketTimeout); + } + if (strAddresses.size() > 1) + { + Poco::MongoDB::QueryRequest request("admin.$cmd"); + request.setNumberToReturn(1); + request.selector().add("isMaster", 1); + Poco::MongoDB::ResponseMessage response; + + sendRequest(request, response); + _uri = newURI; + if (!response.documents().empty()) + { + Poco::MongoDB::Document::Ptr doc = response.documents()[0]; + if (doc->get("ismaster") && readPreference == "primary") + { + break; + } + else if (!doc->get("ismaster") && readPreference == "secondary") + { + break; + } + else if (it + 1 == strAddresses.cend()) + { + throw Poco::URISyntaxException(uri); + } + } + } + } + if (!userInfo.empty()) + { + std::string username; + std::string password; + std::string::size_type pos = userInfo.find(':'); + if (pos != std::string::npos) + { + username.assign(userInfo, 0, pos++); + password.assign(userInfo, pos, userInfo.size() - pos); + } + else username = userInfo; + + Database database(databaseName); + + if (!database.authenticate(*this, username, password, authMechanism)) + throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username)); + } } diff --git a/base/poco/Net/include/Poco/Net/SMTPChannel.h b/base/poco/Net/include/Poco/Net/SMTPChannel.h deleted file mode 100644 index d913ccbc808..00000000000 --- a/base/poco/Net/include/Poco/Net/SMTPChannel.h +++ /dev/null @@ -1,109 +0,0 @@ -// -// SMTPChannel.h -// -// Library: Net -// Package: Logging -// Module: SMTPChannel -// -// Definition of the SMTPChannel class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Net_SMTPChannel_INCLUDED -#define Net_SMTPChannel_INCLUDED - - -#include "Poco/Channel.h" -#include "Poco/Net/Net.h" -#include "Poco/String.h" - - -namespace Poco -{ -namespace Net -{ - - - class Net_API SMTPChannel : public Poco::Channel - /// This Channel implements SMTP (email) logging. - { - public: - SMTPChannel(); - /// Creates a SMTPChannel. - - SMTPChannel(const std::string & mailhost, const std::string & sender, const std::string & recipient); - /// Creates a SMTPChannel with the given target mailhost, sender, and recipient. - - void open(); - /// Opens the SMTPChannel. - - void close(); - /// Closes the SMTPChannel. - - void log(const Message & msg); - /// Sends the message's text to the recipient. - - void setProperty(const std::string & name, const std::string & value); - /// Sets the property with the given value. - /// - /// The following properties are supported: - /// * mailhost: The SMTP server. Default is "localhost". - /// * sender: The sender address. - /// * recipient: The recipient address. - /// * local: If true, local time is used. Default is true. - /// * attachment: Filename of the file to attach. - /// * type: Content type of the file to attach. - /// * delete: Boolean value indicating whether to delete - /// the attachment file after sending. - /// * throw: Boolean value indicating whether to throw - /// exception upon failure. - - std::string getProperty(const std::string & name) const; - /// Returns the value of the property with the given name. - - static void registerChannel(); - /// Registers the channel with the global LoggingFactory. - - static const std::string PROP_MAILHOST; - static const std::string PROP_SENDER; - static const std::string PROP_RECIPIENT; - static const std::string PROP_LOCAL; - static const std::string PROP_ATTACHMENT; - static const std::string PROP_TYPE; - static const std::string PROP_DELETE; - static const std::string PROP_THROW; - - protected: - ~SMTPChannel(); - - private: - bool isTrue(const std::string & value) const; - - std::string _mailHost; - std::string _sender; - std::string _recipient; - bool _local; - std::string _attachment; - std::string _type; - bool _delete; - bool _throw; - }; - - - inline bool SMTPChannel::isTrue(const std::string & value) const - { - return ( - (0 == icompare(value, "true")) || (0 == icompare(value, "t")) || (0 == icompare(value, "yes")) || (0 == icompare(value, "y"))); - } - - -} -} // namespace Poco::Net - - -#endif // Net_SMTPChannel_INCLUDED diff --git a/base/poco/Net/include/Poco/Net/SocketImpl.h b/base/poco/Net/include/Poco/Net/SocketImpl.h index e08d49be7a2..082f258fa98 100644 --- a/base/poco/Net/include/Poco/Net/SocketImpl.h +++ b/base/poco/Net/include/Poco/Net/SocketImpl.h @@ -399,9 +399,12 @@ namespace Net bool initialized() const; /// Returns true iff the underlying socket is initialized. + static void error(int code); + /// Throws an appropriate exception for the given error code. + protected: - SocketImpl(); - /// Creates a SocketImpl. + SocketImpl(); + /// Creates a SocketImpl. SocketImpl(poco_socket_t sockfd); /// Creates a SocketImpl using the given native socket. @@ -446,9 +449,6 @@ namespace Net static void error(const std::string & arg); /// Throws an appropriate exception for the last error. - static void error(int code); - /// Throws an appropriate exception for the given error code. - static void error(int code, const std::string & arg); /// Throws an appropriate exception for the given error code. diff --git a/base/poco/Net/src/SMTPChannel.cpp b/base/poco/Net/src/SMTPChannel.cpp deleted file mode 100644 index a498179a44e..00000000000 --- a/base/poco/Net/src/SMTPChannel.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// -// SMTPChannel.cpp -// -// Library: Net -// Package: Logging -// Module: SMTPChannel -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Net/SMTPChannel.h" -#include "Poco/Net/MailMessage.h" -#include "Poco/Net/MailRecipient.h" -#include "Poco/Net/SMTPClientSession.h" -#include "Poco/Net/StringPartSource.h" -#include "Poco/Message.h" -#include "Poco/DateTimeFormatter.h" -#include "Poco/DateTimeFormat.h" -#include "Poco/LocalDateTime.h" -#include "Poco/LoggingFactory.h" -#include "Poco/Instantiator.h" -#include "Poco/NumberFormatter.h" -#include "Poco/FileStream.h" -#include "Poco/File.h" -#include "Poco/Environment.h" - - -namespace Poco { -namespace Net { - - -const std::string SMTPChannel::PROP_MAILHOST("mailhost"); -const std::string SMTPChannel::PROP_SENDER("sender"); -const std::string SMTPChannel::PROP_RECIPIENT("recipient"); -const std::string SMTPChannel::PROP_LOCAL("local"); -const std::string SMTPChannel::PROP_ATTACHMENT("attachment"); -const std::string SMTPChannel::PROP_TYPE("type"); -const std::string SMTPChannel::PROP_DELETE("delete"); -const std::string SMTPChannel::PROP_THROW("throw"); - - -SMTPChannel::SMTPChannel(): - _mailHost("localhost"), - _local(true), - _type("text/plain"), - _delete(false), - _throw(false) -{ -} - - -SMTPChannel::SMTPChannel(const std::string& mailhost, const std::string& sender, const std::string& recipient): - _mailHost(mailhost), - _sender(sender), - _recipient(recipient), - _local(true), - _type("text/plain"), - _delete(false), - _throw(false) -{ -} - - -SMTPChannel::~SMTPChannel() -{ - try - { - close(); - } - catch (...) - { - poco_unexpected(); - } -} - - -void SMTPChannel::open() -{ -} - - -void SMTPChannel::close() -{ -} - - -void SMTPChannel::log(const Message& msg) -{ - try - { - MailMessage message; - message.setSender(_sender); - message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, _recipient)); - message.setSubject("Log Message from " + _sender); - std::stringstream content; - content << "Log Message\r\n" - << "===========\r\n\r\n" - << "Host: " << Environment::nodeName() << "\r\n" - << "Logger: " << msg.getSource() << "\r\n"; - - if (_local) - { - DateTime dt(msg.getTime()); - content << "Timestamp: " << DateTimeFormatter::format(LocalDateTime(dt), DateTimeFormat::RFC822_FORMAT) << "\r\n"; - } - else - content << "Timestamp: " << DateTimeFormatter::format(msg.getTime(), DateTimeFormat::RFC822_FORMAT) << "\r\n"; - - content << "Priority: " << NumberFormatter::format(msg.getPriority()) << "\r\n" - << "Process ID: " << NumberFormatter::format(msg.getPid()) << "\r\n" - << "Thread: " << msg.getThread() << " (ID: " << msg.getTid() << ")\r\n" - << "Message text: " << msg.getText() << "\r\n\r\n"; - - message.addContent(new StringPartSource(content.str())); - - if (!_attachment.empty()) - { - { - Poco::FileInputStream fis(_attachment, std::ios::in | std::ios::binary | std::ios::ate); - if (fis.good()) - { - typedef std::allocator::size_type SST; - - std::streamoff size = fis.tellg(); - poco_assert (std::numeric_limits::max() >= size); - poco_assert (std::numeric_limits::max() >= size); - char* pMem = new char [static_cast(size)]; - fis.seekg(std::ios::beg); - fis.read(pMem, size); - message.addAttachment(_attachment, - new StringPartSource(std::string(pMem, static_cast(size)), - _type, - _attachment)); - - delete [] pMem; - } - } - if (_delete) File(_attachment).remove(); - } - - SMTPClientSession session(_mailHost); - session.login(); - session.sendMessage(message); - session.close(); - } - catch (Exception&) - { - if (_throw) throw; - } -} - - -void SMTPChannel::setProperty(const std::string& name, const std::string& value) -{ - if (name == PROP_MAILHOST) - _mailHost = value; - else if (name == PROP_SENDER) - _sender = value; - else if (name == PROP_RECIPIENT) - _recipient = value; - else if (name == PROP_LOCAL) - _local = isTrue(value); - else if (name == PROP_ATTACHMENT) - _attachment = value; - else if (name == PROP_TYPE) - _type = value; - else if (name == PROP_DELETE) - _delete = isTrue(value); - else if (name == PROP_THROW) - _throw = isTrue(value); - else - Channel::setProperty(name, value); -} - - -std::string SMTPChannel::getProperty(const std::string& name) const -{ - if (name == PROP_MAILHOST) - return _mailHost; - else if (name == PROP_SENDER) - return _sender; - else if (name == PROP_RECIPIENT) - return _recipient; - else if (name == PROP_LOCAL) - return _local ? "true" : "false"; - else if (name == PROP_ATTACHMENT) - return _attachment; - else if (name == PROP_TYPE) - return _type; - else if (name == PROP_DELETE) - return _delete ? "true" : "false"; - else if (name == PROP_THROW) - return _throw ? "true" : "false"; - else - return Channel::getProperty(name); -} - - -void SMTPChannel::registerChannel() -{ - Poco::LoggingFactory::defaultFactory().registerChannelClass("SMTPChannel", - new Poco::Instantiator); -} - - -} } // namespace Poco::Net diff --git a/base/poco/Util/include/Poco/Util/ConfigurationMapper.h b/base/poco/Util/include/Poco/Util/ConfigurationMapper.h deleted file mode 100644 index dc1dd1fe86c..00000000000 --- a/base/poco/Util/include/Poco/Util/ConfigurationMapper.h +++ /dev/null @@ -1,97 +0,0 @@ -// -// ConfigurationMapper.h -// -// Library: Util -// Package: Configuration -// Module: ConfigurationMapper -// -// Definition of the ConfigurationMapper class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_ConfigurationMapper_INCLUDED -#define Util_ConfigurationMapper_INCLUDED - - -#include "Poco/Util/AbstractConfiguration.h" -#include "Poco/Util/Util.h" - - -namespace Poco -{ -namespace Util -{ - - - class Util_API ConfigurationMapper : public AbstractConfiguration - /// This configuration maps a property hierarchy into another - /// hierarchy. - /// - /// For example, given a configuration with the following properties: - /// config.value1 - /// config.value2 - /// config.sub.value1 - /// config.sub.value2 - /// and a ConfigurationView with fromPrefix == "config" and toPrefix == "root.conf", then - /// the above properties will be available via the mapper as - /// root.conf.value1 - /// root.conf.value2 - /// root.conf.sub.value1 - /// root.conf.sub.value2 - /// - /// FromPrefix can be empty, in which case, and given toPrefix == "root", - /// the properties will be available as - /// root.config.value1 - /// root.config.value2 - /// root.config.sub.value1 - /// root.config.sub.value2 - /// - /// This is equivalent to the functionality of the ConfigurationView class. - /// - /// Similarly, toPrefix can also be empty. Given fromPrefix == "config" and - /// toPrefix == "", the properties will be available as - /// value1 - /// value2 - /// sub.value1 - /// sub.value2 - /// - /// If both fromPrefix and toPrefix are empty, no mapping is performed. - /// - /// A ConfigurationMapper is most useful in combination with a - /// LayeredConfiguration. - { - public: - ConfigurationMapper(const std::string & fromPrefix, const std::string & toPrefix, AbstractConfiguration * pConfig); - /// Creates the ConfigurationMapper. The ConfigurationMapper does not take - /// ownership of the passed configuration. - - protected: - bool getRaw(const std::string & key, std::string & value) const; - void setRaw(const std::string & key, const std::string & value); - void enumerate(const std::string & key, Keys & range) const; - void removeRaw(const std::string & key); - - std::string translateKey(const std::string & key) const; - - ~ConfigurationMapper(); - - private: - ConfigurationMapper(const ConfigurationMapper &); - ConfigurationMapper & operator=(const ConfigurationMapper &); - - std::string _fromPrefix; - std::string _toPrefix; - AbstractConfiguration * _pConfig; - }; - - -} -} // namespace Poco::Util - - -#endif // Util_ConfigurationMapper_INCLUDED diff --git a/base/poco/Util/include/Poco/Util/WinRegistryConfiguration.h b/base/poco/Util/include/Poco/Util/WinRegistryConfiguration.h deleted file mode 100644 index 43dc34f630b..00000000000 --- a/base/poco/Util/include/Poco/Util/WinRegistryConfiguration.h +++ /dev/null @@ -1,75 +0,0 @@ -// -// WinRegistryConfiguration.h -// -// Library: Util -// Package: Windows -// Module: WinRegistryConfiguration -// -// Definition of the WinRegistryConfiguration class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_WinRegistryConfiguration_INCLUDED -#define Util_WinRegistryConfiguration_INCLUDED - - -#include "Poco/String.h" -#include "Poco/Util/AbstractConfiguration.h" -#include "Poco/Util/Util.h" - - -namespace Poco -{ -namespace Util -{ - - - class Util_API WinRegistryConfiguration : public AbstractConfiguration - /// An implementation of AbstractConfiguration that stores configuration data - /// in the Windows registry. - /// - /// Removing key is not supported. An attempt to remove a key results - /// in a NotImplementedException being thrown. - { - public: - WinRegistryConfiguration(const std::string & rootPath, REGSAM extraSam = 0); - /// Creates the WinRegistryConfiguration. - /// The rootPath must start with one of the root key names - /// like HKEY_CLASSES_ROOT, e.g. HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services. - /// All further keys are relative to the root path and can be - /// dot separated, e.g. the path MyService.ServiceName will be converted to - /// HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\MyService\ServiceName. - /// The extraSam parameter will be passed along to WinRegistryKey, to control - /// registry virtualization for example. - - protected: - ~WinRegistryConfiguration(); - /// Destroys the WinRegistryConfiguration. - - bool getRaw(const std::string & key, std::string & value) const; - void setRaw(const std::string & key, const std::string & value); - void enumerate(const std::string & key, Keys & range) const; - void removeRaw(const std::string & key); - - std::string convertToRegFormat(const std::string & key, std::string & keyName) const; - /// Takes a key in the format of A.B.C and converts it to - /// registry format A\B\C, the last entry is the keyName, the rest is returned as path - - friend class WinConfigurationTest; - - private: - std::string _rootPath; - REGSAM _extraSam; - }; - - -} -} // namespace Poco::Util - - -#endif // Util_WinRegistryConfiguration_INCLUDED diff --git a/base/poco/Util/include/Poco/Util/WinRegistryKey.h b/base/poco/Util/include/Poco/Util/WinRegistryKey.h deleted file mode 100644 index 9aa5e35ed8a..00000000000 --- a/base/poco/Util/include/Poco/Util/WinRegistryKey.h +++ /dev/null @@ -1,199 +0,0 @@ -// -// WinRegistryKey.h -// -// Library: Util -// Package: Windows -// Module: WinRegistryKey -// -// Definition of the WinRegistryKey class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_WinRegistryKey_INCLUDED -#define Util_WinRegistryKey_INCLUDED - - -#include -#include "Poco/UnWindows.h" -#include "Poco/Util/Util.h" - - -namespace Poco -{ -namespace Util -{ - - - class Util_API WinRegistryKey - /// This class implements a convenient interface to the - /// Windows Registry. - /// - /// This class is only available on Windows platforms. - { - public: - typedef std::vector Keys; - typedef std::vector Values; - - enum Type - { - REGT_NONE = 0, - REGT_STRING = 1, - REGT_STRING_EXPAND = 2, - REGT_BINARY = 3, - REGT_DWORD = 4, - REGT_DWORD_BIG_ENDIAN = 5, - REGT_LINK = 6, - REGT_MULTI_STRING = 7, - REGT_RESOURCE_LIST = 8, - REGT_FULL_RESOURCE_DESCRIPTOR = 9, - REGT_RESOURCE_REQUIREMENTS_LIST = 10, - REGT_QWORD = 11 - }; - - WinRegistryKey(const std::string & key, bool readOnly = false, REGSAM extraSam = 0); - /// Creates the WinRegistryKey. - /// - /// The key must start with one of the root key names - /// like HKEY_CLASSES_ROOT, e.g. HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services. - /// - /// If readOnly is true, then only read access to the registry - /// is available and any attempt to write to the registry will - /// result in an exception. - /// - /// extraSam is used to pass extra flags (in addition to KEY_READ and KEY_WRITE) - /// to the samDesired argument of RegOpenKeyEx() or RegCreateKeyEx(). - - WinRegistryKey(HKEY hRootKey, const std::string & subKey, bool readOnly = false, REGSAM extraSam = 0); - /// Creates the WinRegistryKey. - /// - /// If readOnly is true, then only read access to the registry - /// is available and any attempt to write to the registry will - /// result in an exception. - /// - /// extraSam is used to pass extra flags (in addition to KEY_READ and KEY_WRITE) - /// to the samDesired argument of RegOpenKeyEx() or RegCreateKeyEx(). - - ~WinRegistryKey(); - /// Destroys the WinRegistryKey. - - void setString(const std::string & name, const std::string & value); - /// Sets the string value (REG_SZ) with the given name. - /// An empty name denotes the default value. - - std::string getString(const std::string & name); - /// Returns the string value (REG_SZ) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - void setStringExpand(const std::string & name, const std::string & value); - /// Sets the expandable string value (REG_EXPAND_SZ) with the given name. - /// An empty name denotes the default value. - - std::string getStringExpand(const std::string & name); - /// Returns the string value (REG_EXPAND_SZ) with the given name. - /// An empty name denotes the default value. - /// All references to environment variables (%VAR%) in the string - /// are expanded. - /// - /// Throws a NotFoundException if the value does not exist. - - void setBinary(const std::string & name, const std::vector & value); - /// Sets the string value (REG_BINARY) with the given name. - /// An empty name denotes the default value. - - std::vector getBinary(const std::string & name); - /// Returns the string value (REG_BINARY) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - void setInt(const std::string & name, int value); - /// Sets the numeric (REG_DWORD) value with the given name. - /// An empty name denotes the default value. - - int getInt(const std::string & name); - /// Returns the numeric value (REG_DWORD) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - - void setInt64(const std::string & name, Poco::Int64 value); - /// Sets the numeric (REG_QWORD) value with the given name. - /// An empty name denotes the default value. - - Poco::Int64 getInt64(const std::string & name); - /// Returns the numeric value (REG_QWORD) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - - void deleteValue(const std::string & name); - /// Deletes the value with the given name. - /// - /// Throws a NotFoundException if the value does not exist. - - void deleteKey(); - /// Recursively deletes the key and all subkeys. - - bool exists(); - /// Returns true iff the key exists. - - Type type(const std::string & name); - /// Returns the type of the key value. - - bool exists(const std::string & name); - /// Returns true iff the given value exists under that key. - - void subKeys(Keys & keys); - /// Appends all subKey names to keys. - - void values(Values & vals); - /// Appends all value names to vals; - - bool isReadOnly() const; - /// Returns true iff the key has been opened for read-only access only. - - protected: - void open(); - void close(); - std::string key() const; - std::string key(const std::string & valueName) const; - HKEY handle(); - void handleSetError(const std::string & name); - static HKEY handleFor(const std::string & rootKey); - - private: - WinRegistryKey(); - WinRegistryKey(const WinRegistryKey &); - WinRegistryKey & operator=(const WinRegistryKey &); - - HKEY _hRootKey; - std::string _subKey; - HKEY _hKey; - bool _readOnly; - REGSAM _extraSam; - }; - - - // - // inlines - // - inline bool WinRegistryKey::isReadOnly() const - { - return _readOnly; - } - - -} -} // namespace Poco::Util - - -#endif // Util_WinRegistryKey_INCLUDED diff --git a/base/poco/Util/include/Poco/Util/WinService.h b/base/poco/Util/include/Poco/Util/WinService.h deleted file mode 100644 index 52377dfb67b..00000000000 --- a/base/poco/Util/include/Poco/Util/WinService.h +++ /dev/null @@ -1,140 +0,0 @@ -// -// WinService.h -// -// Library: Util -// Package: Windows -// Module: WinService -// -// Definition of the WinService class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_WinService_INCLUDED -#define Util_WinService_INCLUDED - - -#include "Poco/UnWindows.h" -#include "Poco/Util/Util.h" - - -# define POCO_LPQUERY_SERVICE_CONFIG LPQUERY_SERVICE_CONFIGA - - -namespace Poco -{ -namespace Util -{ - - - class Util_API WinService - /// This class provides an object-oriented interface to - /// the Windows Service Control Manager for registering, - /// unregistering, configuring, starting and stopping - /// services. - /// - /// This class is only available on Windows platforms. - { - public: - enum Startup - { - SVC_AUTO_START, - SVC_MANUAL_START, - SVC_DISABLED - }; - - WinService(const std::string & name); - /// Creates the WinService, using the given service name. - - ~WinService(); - /// Destroys the WinService. - - const std::string & name() const; - /// Returns the service name. - - std::string displayName() const; - /// Returns the service's display name. - - std::string path() const; - /// Returns the path to the service executable. - /// - /// Throws a NotFoundException if the service has not been registered. - - void registerService(const std::string & path, const std::string & displayName); - /// Creates a Windows service with the executable specified by path - /// and the given displayName. - /// - /// Throws a ExistsException if the service has already been registered. - - void registerService(const std::string & path); - /// Creates a Windows service with the executable specified by path - /// and the given displayName. The service name is used as display name. - /// - /// Throws a ExistsException if the service has already been registered. - - void unregisterService(); - /// Deletes the Windows service. - /// - /// Throws a NotFoundException if the service has not been registered. - - bool isRegistered() const; - /// Returns true if the service has been registered with the Service Control Manager. - - bool isRunning() const; - /// Returns true if the service is currently running. - - void start(); - /// Starts the service. - /// Does nothing if the service is already running. - /// - /// Throws a NotFoundException if the service has not been registered. - - void stop(); - /// Stops the service. - /// Does nothing if the service is not running. - /// - /// Throws a NotFoundException if the service has not been registered. - - void setStartup(Startup startup); - /// Sets the startup mode for the service. - - Startup getStartup() const; - /// Returns the startup mode for the service. - - void setDescription(const std::string & description); - /// Sets the service description in the registry. - - std::string getDescription() const; - /// Returns the service description from the registry. - - static const int STARTUP_TIMEOUT; - - protected: - static const std::string REGISTRY_KEY; - static const std::string REGISTRY_DESCRIPTION; - - private: - void open() const; - bool tryOpen() const; - void close() const; - POCO_LPQUERY_SERVICE_CONFIG config() const; - - WinService(); - WinService(const WinService &); - WinService & operator=(const WinService &); - - std::string _name; - SC_HANDLE _scmHandle; - mutable SC_HANDLE _svcHandle; - }; - - -} -} // namespace Poco::Util - - -#endif // Util_WinService_INCLUDED diff --git a/base/poco/Util/src/ConfigurationMapper.cpp b/base/poco/Util/src/ConfigurationMapper.cpp deleted file mode 100644 index d76f9c0b6da..00000000000 --- a/base/poco/Util/src/ConfigurationMapper.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// -// ConfigurationMapper.cpp -// -// Library: Util -// Package: Configuration -// Module: ConfigurationMapper -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Util/ConfigurationMapper.h" - - -namespace Poco { -namespace Util { - - -ConfigurationMapper::ConfigurationMapper(const std::string& fromPrefix, const std::string& toPrefix, AbstractConfiguration* pConfig): - _fromPrefix(fromPrefix), - _toPrefix(toPrefix), - _pConfig(pConfig) -{ - poco_check_ptr (pConfig); - - if (!_fromPrefix.empty()) _fromPrefix += '.'; - if (!_toPrefix.empty()) _toPrefix += '.'; - - _pConfig->duplicate(); -} - - -ConfigurationMapper::~ConfigurationMapper() -{ - _pConfig->release(); -} - - -bool ConfigurationMapper::getRaw(const std::string& key, std::string& value) const -{ - std::string translatedKey = translateKey(key); - return _pConfig->getRaw(translatedKey, value); -} - - -void ConfigurationMapper::setRaw(const std::string& key, const std::string& value) -{ - std::string translatedKey = translateKey(key); - _pConfig->setRaw(translatedKey, value); -} - - -void ConfigurationMapper::enumerate(const std::string& key, Keys& range) const -{ - std::string cKey(key); - if (!cKey.empty()) cKey += '.'; - std::string::size_type keyLen = cKey.length(); - if (keyLen < _toPrefix.length()) - { - if (_toPrefix.compare(0, keyLen, cKey) == 0) - { - std::string::size_type pos = _toPrefix.find_first_of('.', keyLen); - poco_assert_dbg(pos != std::string::npos); - range.push_back(_toPrefix.substr(keyLen, pos - keyLen)); - } - } - else - { - std::string translatedKey; - if (cKey == _toPrefix) - { - translatedKey = _fromPrefix; - if (!translatedKey.empty()) - translatedKey.resize(translatedKey.length() - 1); - } - else translatedKey = translateKey(key); - _pConfig->enumerate(translatedKey, range); - } -} - - -void ConfigurationMapper::removeRaw(const std::string& key) -{ - std::string translatedKey = translateKey(key); - _pConfig->remove(translatedKey); -} - - -std::string ConfigurationMapper::translateKey(const std::string& key) const -{ - std::string result(key); - if (result.compare(0, _toPrefix.size(), _toPrefix) == 0) - result.replace(0, _toPrefix.size(), _fromPrefix); - return result; -} - - -} } // namespace Poco::Util diff --git a/base/poco/Util/src/XMLConfiguration.cpp b/base/poco/Util/src/XMLConfiguration.cpp index 974361044d7..e0d363cc870 100644 --- a/base/poco/Util/src/XMLConfiguration.cpp +++ b/base/poco/Util/src/XMLConfiguration.cpp @@ -27,7 +27,7 @@ #include "Poco/Exception.h" #include "Poco/NumberParser.h" #include "Poco/NumberFormatter.h" -#include +#include namespace Poco { diff --git a/base/readpassphrase/readpassphrase.c b/base/readpassphrase/readpassphrase.c index a84ec43767c..fbd582ffe79 100644 --- a/base/readpassphrase/readpassphrase.c +++ b/base/readpassphrase/readpassphrase.c @@ -27,9 +27,7 @@ #define _PATH_TTY "/dev/tty" #endif -#ifdef HAS_RESERVED_IDENTIFIER #pragma clang diagnostic ignored "-Wreserved-identifier" -#endif #include #include diff --git a/cmake/add_check.cmake b/cmake/add_check.cmake index c6abbcdb321..ba30ee8676f 100644 --- a/cmake/add_check.cmake +++ b/cmake/add_check.cmake @@ -5,11 +5,11 @@ if (NOT TARGET check) if (CMAKE_CONFIGURATION_TYPES) add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND} --force-new-ctest-process --output-on-failure --build-config "$" - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) else () add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND} --force-new-ctest-process --output-on-failure - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) endif () endif () diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index b52b2eda992..462529fbc13 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54472) +SET(VERSION_REVISION 54474) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 3) +SET(VERSION_MINOR 5) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 52bf836e03a6ba7cf2d654eaaf73231701abc3a2) -SET(VERSION_DESCRIBE v23.3.1.2537-testing) -SET(VERSION_STRING 23.3.1.2537) +SET(VERSION_GITHASH 3920eb987f7ed837ada5de8907284adf123f0583) +SET(VERSION_DESCRIBE v23.5.1.1-testing) +SET(VERSION_STRING 23.5.1.1) # end of autochange diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index f0769f337d0..9a70e4aee32 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,5 +1,6 @@ # Setup integration with ccache to speed up builds, see https://ccache.dev/ +# Matches both ccache and sccache if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MATCHES "ccache") # custom compiler launcher already defined, most likely because cmake was invoked with like "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache" or # via environment variable --> respect setting and trust that the launcher was specified correctly @@ -8,45 +9,57 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA return() endif() -option(ENABLE_CCACHE "Speedup re-compilations using ccache (external tool)" ON) +set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (ccache, then sccache), 'ccache', 'sccache', or 'disabled'") -if (NOT ENABLE_CCACHE) - message(STATUS "Using ccache: no (disabled via configuration)") +if(COMPILER_CACHE STREQUAL "auto") + find_program (CCACHE_EXECUTABLE ccache sccache) +elseif (COMPILER_CACHE STREQUAL "ccache") + find_program (CCACHE_EXECUTABLE ccache) +elseif(COMPILER_CACHE STREQUAL "sccache") + find_program (CCACHE_EXECUTABLE sccache) +elseif(COMPILER_CACHE STREQUAL "disabled") + message(STATUS "Using *ccache: no (disabled via configuration)") return() +else() + message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), value: '${COMPILER_CACHE}'") endif() -find_program (CCACHE_EXECUTABLE ccache) if (NOT CCACHE_EXECUTABLE) - message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (Could not find find ccache. To significantly reduce compile times for the 2nd, 3rd, etc. build, it is highly recommended to install ccache. To suppress this message, run cmake with -DENABLE_CCACHE=0)") + message(${RECONFIGURE_MESSAGE_LEVEL} "Using *ccache: no (Could not find find ccache or sccache. To significantly reduce compile times for the 2nd, 3rd, etc. build, it is highly recommended to install one of them. To suppress this message, run cmake with -DCOMPILER_CACHE=disabled)") return() endif() -execute_process(COMMAND ${CCACHE_EXECUTABLE} "-V" OUTPUT_VARIABLE CCACHE_VERSION) -string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) +if (CCACHE_EXECUTABLE MATCHES "/ccache$") + execute_process(COMMAND ${CCACHE_EXECUTABLE} "-V" OUTPUT_VARIABLE CCACHE_VERSION) + string(REGEX REPLACE "ccache version ([0-9\\.]+).*" "\\1" CCACHE_VERSION ${CCACHE_VERSION}) -set (CCACHE_MINIMUM_VERSION 3.3) + set (CCACHE_MINIMUM_VERSION 3.3) -if (CCACHE_VERSION VERSION_LESS_EQUAL ${CCACHE_MINIMUM_VERSION}) - message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}), the minimum required version is ${CCACHE_MINIMUM_VERSION}") - return() -endif() + if (CCACHE_VERSION VERSION_LESS_EQUAL ${CCACHE_MINIMUM_VERSION}) + message(${RECONFIGURE_MESSAGE_LEVEL} "Using ccache: no (found ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION}), the minimum required version is ${CCACHE_MINIMUM_VERSION}") + return() + endif() -message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") -set(LAUNCHER ${CCACHE_EXECUTABLE}) + message(STATUS "Using ccache: ${CCACHE_EXECUTABLE} (version ${CCACHE_VERSION})") + set(LAUNCHER ${CCACHE_EXECUTABLE}) -# Work around a well-intended but unfortunate behavior of ccache 4.0 & 4.1 with -# environment variable SOURCE_DATE_EPOCH. This variable provides an alternative -# to source-code embedded timestamps (__DATE__/__TIME__) and therefore helps with -# reproducible builds (*). SOURCE_DATE_EPOCH is set automatically by the -# distribution, e.g. Debian. Ccache 4.0 & 4.1 incorporate SOURCE_DATE_EPOCH into -# the hash calculation regardless they contain timestamps or not. This invalidates -# the cache whenever SOURCE_DATE_EPOCH changes. As a fix, ignore SOURCE_DATE_EPOCH. -# -# (*) https://reproducible-builds.org/specs/source-date-epoch/ -if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") - message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache 4.0 / 4.1") - set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) + # Work around a well-intended but unfortunate behavior of ccache 4.0 & 4.1 with + # environment variable SOURCE_DATE_EPOCH. This variable provides an alternative + # to source-code embedded timestamps (__DATE__/__TIME__) and therefore helps with + # reproducible builds (*). SOURCE_DATE_EPOCH is set automatically by the + # distribution, e.g. Debian. Ccache 4.0 & 4.1 incorporate SOURCE_DATE_EPOCH into + # the hash calculation regardless they contain timestamps or not. This invalidates + # the cache whenever SOURCE_DATE_EPOCH changes. As a fix, ignore SOURCE_DATE_EPOCH. + # + # (*) https://reproducible-builds.org/specs/source-date-epoch/ + if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2") + message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache 4.0 / 4.1") + set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_EXECUTABLE}) + endif() +elseif(CCACHE_EXECUTABLE MATCHES "/sccache$") + message(STATUS "Using sccache: ${CCACHE_EXECUTABLE}") + set(LAUNCHER ${CCACHE_EXECUTABLE}) endif() set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) diff --git a/cmake/check_flags.cmake b/cmake/check_flags.cmake index 518f9ecf8de..294f135e8ee 100644 --- a/cmake/check_flags.cmake +++ b/cmake/check_flags.cmake @@ -1,7 +1,5 @@ include (CheckCXXCompilerFlag) include (CheckCCompilerFlag) -check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER) -check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE) -check_cxx_compiler_flag("-Wsuggest-override" HAS_SUGGEST_OVERRIDE) -check_cxx_compiler_flag("-Xclang -fuse-ctor-homing" HAS_USE_CTOR_HOMING) +# Set/unset variable based on existence of compiler flags. Example: +# check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER) diff --git a/cmake/clang_tidy.cmake b/cmake/clang_tidy.cmake index ceaafdaa9aa..96c295b6bb9 100644 --- a/cmake/clang_tidy.cmake +++ b/cmake/clang_tidy.cmake @@ -5,14 +5,14 @@ if (ENABLE_CLANG_TIDY) find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache") if (CLANG_TIDY_CACHE_PATH) - find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12" "clang-tidy") + find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy") # Why do we use ';' here? # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax. set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper") else () - find_program (CLANG_TIDY_PATH NAMES "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12" "clang-tidy") + find_program (CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy") endif () if (CLANG_TIDY_PATH) diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 3e6e4907a71..812847e6201 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -2,13 +2,6 @@ set (DEFAULT_LIBS "-nodefaultlibs") set (DEFAULT_LIBS "${DEFAULT_LIBS} ${COVERAGE_OPTION} -lc -lm -lpthread -ldl") -if (COMPILER_GCC) - set (DEFAULT_LIBS "${DEFAULT_LIBS} -lgcc_eh") - if (ARCH_AARCH64) - set (DEFAULT_LIBS "${DEFAULT_LIBS} -lgcc") - endif () -endif () - message(STATUS "Default libraries: ${DEFAULT_LIBS}") set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS}) diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 3e1f22ef2e4..65bf296ee09 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -1,9 +1,5 @@ set (DEFAULT_LIBS "-nodefaultlibs") -if (NOT COMPILER_CLANG) - message (FATAL_ERROR "FreeBSD build is supported only for Clang") -endif () - if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64") execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-x86_64.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) else () diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake index 578a9757270..52f301ab8ad 100644 --- a/cmake/fuzzer.cmake +++ b/cmake/fuzzer.cmake @@ -7,10 +7,6 @@ if (FUZZER) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link") - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=fuzzer-no-link") - endif() - # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable if (NOT LIB_FUZZING_ENGINE) set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer") diff --git a/cmake/git.cmake b/cmake/git.cmake index 397ec3cd081..a4b3bd4bdab 100644 --- a/cmake/git.cmake +++ b/cmake/git.cmake @@ -5,14 +5,14 @@ if (Git_FOUND) # Commit hash + whether the building workspace was dirty or not execute_process(COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_HASH ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) # Branch name execute_process(COMMAND "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_BRANCH ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -20,14 +20,14 @@ if (Git_FOUND) SET(ENV{TZ} "UTC") execute_process(COMMAND "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_DATE ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) # Subject of the commit execute_process(COMMAND "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_COMMIT_SUBJECT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -35,7 +35,7 @@ if (Git_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) else() message(STATUS "Git could not be found.") endif() diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 23c5fc3e14f..d42d587303a 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -11,8 +11,6 @@ if (COMPILER_CLANG) if (NOT EXISTS "${BUILTINS_LIBRARY}") set (BUILTINS_LIBRARY "-lgcc") endif () -else () - set (BUILTINS_LIBRARY "-lgcc") endif () if (OS_ANDROID) diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake index 49a036c2972..7f876f88d72 100644 --- a/cmake/linux/toolchain-riscv64.cmake +++ b/cmake/linux/toolchain-riscv64.cmake @@ -21,8 +21,7 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=bfd") -set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=bfd") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd") # Currently, lld does not work with the error: # ld.lld: error: section size decrease is too large diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake index e73d779284a..55b9df79f70 100644 --- a/cmake/linux/toolchain-x86_64.cmake +++ b/cmake/linux/toolchain-x86_64.cmake @@ -30,7 +30,6 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") diff --git a/cmake/print_flags.cmake b/cmake/print_flags.cmake index 66f2a8bfbc7..869764602d4 100644 --- a/cmake/print_flags.cmake +++ b/cmake/print_flags.cmake @@ -7,6 +7,6 @@ message (STATUS "compiler CXX = ${CMAKE_CXX_COMPILER} ${FULL_CXX_FLAGS}") message (STATUS "LINKER_FLAGS = ${FULL_EXE_LINKER_FLAGS}") # Reproducible builds -string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}") -string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}") -string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}") +string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}") +string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}") +string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}") diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index cf7f7606618..6ac46bb42fa 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -8,75 +8,42 @@ option (SANITIZE "Enable one of the code sanitizers" "") set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER") -# gcc with -nodefaultlibs does not add sanitizer libraries -# with -static-libasan and similar -macro(add_explicit_sanitizer_library lib) - target_link_libraries(global-libs INTERFACE "-Wl,-static -l${lib} -Wl,-Bdynamic") -endmacro() +# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because +# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829) if (SANITIZE) if (SANITIZE STREQUAL "address") - # LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope', - # see https://github.com/llvm/llvm-project/issues/58633 - set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope") + set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope") + if (COMPILER_CLANG) + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16) + # LLVM-15 has a bug in Address Sanitizer, preventing the usage + # of 'sanitize-address-use-after-scope', see [1]. + # + # [1]: https://github.com/llvm/llvm-project/issues/58633 + set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope") + endif() + endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}") - endif() - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan") - endif () - if (COMPILER_GCC) - add_explicit_sanitizer_library(asan) - endif() - elseif (SANITIZE STREQUAL "memory") # MemorySanitizer flags are set according to the official documentation: # https://clang.llvm.org/docs/MemorySanitizer.html#usage - # - # For now, it compiles with `cmake -DSANITIZE=memory -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_FLAGS_ADD="-O1" -DCMAKE_C_FLAGS_ADD="-O1"` - # Compiling with -DCMAKE_BUILD_TYPE=Debug leads to ld.lld failures because - # of large files (was not tested with ld.gold). This is why we compile with - # RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to - # keep the binary size down. - # TODO: try compiling with -Og and with ld.gold. - set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") + # Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries. + # Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay. + set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory") - endif() - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan") - endif () - elseif (SANITIZE STREQUAL "thread") set (TSAN_FLAGS "-fsanitize=thread") if (COMPILER_CLANG) - set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt") - else() - set (MESSAGE "TSAN suppressions was not passed to the compiler (since the compiler is not clang)\n") - set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n") - set (MESSAGE "${MESSAGE} export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"") - message (WARNING "${MESSAGE}") + set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/tsan_suppressions.txt") endif() - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}") - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread") - endif() - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan") - endif () - if (COMPILER_GCC) - add_explicit_sanitizer_library(tsan) - endif() elseif (SANITIZE STREQUAL "undefined") set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero") @@ -90,25 +57,11 @@ if (SANITIZE) set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow") endif() if (COMPILER_CLANG) - set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt") - else() - set (MESSAGE "UBSAN suppressions was not passed to the compiler (since the compiler is not clang)\n") - set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n") - set (MESSAGE "${MESSAGE} export UBSAN_OPTIONS=\"$UBSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt\"") - message (WARNING "${MESSAGE}") + set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/ubsan_suppressions.txt") endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}") - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") - endif() - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan") - endif () - if (COMPILER_GCC) - add_explicit_sanitizer_library(ubsan) - endif() # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "") diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 4d4d741cc3a..802907c9dda 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -1,8 +1,6 @@ # Compiler -if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (COMPILER_GCC 1) -elseif (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") +if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") set (COMPILER_CLANG 1) # Safe to treat AppleClang as a regular Clang, in general. elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") set (COMPILER_CLANG 1) @@ -18,16 +16,8 @@ message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") set (CLANG_MINIMUM_VERSION 15) set (XCODE_MINIMUM_VERSION 12.0) set (APPLE_CLANG_MINIMUM_VERSION 12.0.0) -set (GCC_MINIMUM_VERSION 11) -if (COMPILER_GCC) - message (FATAL_ERROR "Compilation with GCC is unsupported. Please use Clang instead.") - - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION}) - message (FATAL_ERROR "Compilation with GCC version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${GCC_MINIMUM_VERSION}.") - endif () - -elseif (COMPILER_CLANG) +if (COMPILER_CLANG) if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it. if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG}) @@ -50,70 +40,45 @@ endif () string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION}) list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR) -# Example values: `lld-10`, `gold`. +# Example values: `lld-10` option (LINKER_NAME "Linker name or full path") -# s390x doesnt support lld -if (NOT ARCH_S390X) - if (NOT LINKER_NAME) - if (COMPILER_GCC) - find_program (LLD_PATH NAMES "ld.lld") - find_program (GOLD_PATH NAMES "ld.gold") - elseif (COMPILER_CLANG) - # llvm lld is a generic driver. - # Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead - if (OS_LINUX) +if (LINKER_NAME MATCHES "gold") + message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.") +endif () + +if (NOT LINKER_NAME) + if (COMPILER_CLANG) + if (OS_LINUX) + if (NOT ARCH_S390X) # s390x doesnt support lld find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld") - elseif (OS_DARWIN) - find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld") endif () - find_program (GOLD_PATH NAMES "ld.gold" "gold") + endif () + endif () + if (OS_LINUX) + if (LLD_PATH) + if (COMPILER_CLANG) + # Clang driver simply allows full linker path. + set (LINKER_NAME ${LLD_PATH}) + endif () endif () endif() endif() -if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME) - # prefer lld linker over gold or ld on linux and macos - if (LLD_PATH) - if (COMPILER_GCC) - # GCC driver requires one of supported linker names like "lld". - set (LINKER_NAME "lld") - else () - # Clang driver simply allows full linker path. - set (LINKER_NAME ${LLD_PATH}) - endif () - endif () - - if (NOT LINKER_NAME) - if (GOLD_PATH) - message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.") - if (COMPILER_GCC) - set (LINKER_NAME "gold") - else () - set (LINKER_NAME ${GOLD_PATH}) - endif () - endif () - endif () -endif () -# TODO: allow different linker on != OS_LINUX - if (LINKER_NAME) - if (COMPILER_CLANG) - find_program (LLD_PATH NAMES ${LINKER_NAME}) - if (NOT LLD_PATH) - message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") - endif () - - # This a temporary quirk to emit .debug_aranges with ThinLTO + find_program (LLD_PATH NAMES ${LINKER_NAME}) + if (NOT LLD_PATH) + message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") + endif () + # This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16 + if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld") configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") else () - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") - endif () + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") + endif() endif () @@ -125,9 +90,7 @@ endif() # Archiver -if (COMPILER_GCC) - find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-15" "llvm-ar-14" "llvm-ar-13" "llvm-ar-12") -else () +if (COMPILER_CLANG) find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar") endif () @@ -139,9 +102,7 @@ message(STATUS "Using archiver: ${CMAKE_AR}") # Ranlib -if (COMPILER_GCC) - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-15" "llvm-ranlib-14" "llvm-ranlib-13" "llvm-ranlib-12") -else () +if (COMPILER_CLANG) find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib") endif () @@ -153,9 +114,7 @@ message(STATUS "Using ranlib: ${CMAKE_RANLIB}") # Install Name Tool -if (COMPILER_GCC) - find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool" "llvm-install-name-tool-15" "llvm-install-name-tool-14" "llvm-install-name-tool-13" "llvm-install-name-tool-12") -else () +if (COMPILER_CLANG) find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool") endif () @@ -167,9 +126,7 @@ message(STATUS "Using install-name-tool: ${CMAKE_INSTALL_NAME_TOOL}") # Objcopy -if (COMPILER_GCC) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-15" "llvm-objcopy-14" "llvm-objcopy-13" "llvm-objcopy-12" "objcopy") -else () +if (COMPILER_CLANG) find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy") endif () @@ -181,9 +138,7 @@ endif () # Strip -if (COMPILER_GCC) - find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-15" "llvm-strip-14" "llvm-strip-13" "llvm-strip-12" "strip") -else () +if (COMPILER_CLANG) find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () diff --git a/cmake/version.cmake b/cmake/version.cmake index acaa772ff2f..9ca21556f4d 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -1,4 +1,4 @@ -include(${CMAKE_SOURCE_DIR}/cmake/autogenerated_versions.txt) +include(${PROJECT_SOURCE_DIR}/cmake/autogenerated_versions.txt) set(VERSION_EXTRA "" CACHE STRING "") set(VERSION_TWEAK "" CACHE STRING "") diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 5d116b199cf..00fa32a6b7f 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -47,115 +47,4 @@ if (COMPILER_CLANG) no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 no_warning(unsafe-buffer-usage) # too aggressive # TODO Enable conversion, sign-conversion, double-promotion warnings. -elseif (COMPILER_GCC) - # Add compiler options only to c++ compiler - function(add_cxx_compile_options option) - add_compile_options("$<$,CXX>:${option}>") - endfunction() - # Warn about boolean expression compared with an integer value different from true/false - add_cxx_compile_options(-Wbool-compare) - # Warn whenever a pointer is cast such that the required alignment of the target is increased. - add_cxx_compile_options(-Wcast-align) - # Warn whenever a pointer is cast so as to remove a type qualifier from the target type. - add_cxx_compile_options(-Wcast-qual) - # Warn when deleting a pointer to incomplete type, which may cause undefined behavior at runtime - add_cxx_compile_options(-Wdelete-incomplete) - # Warn if a requested optimization pass is disabled. Code is too big or too complex - add_cxx_compile_options(-Wdisabled-optimization) - # Warn about duplicated conditions in an if-else-if chain - add_cxx_compile_options(-Wduplicated-cond) - # Warn about a comparison between values of different enumerated types - add_cxx_compile_options(-Wenum-compare) - # Warn about uninitialized variables that are initialized with themselves - add_cxx_compile_options(-Winit-self) - # Warn about logical not used on the left hand side operand of a comparison - add_cxx_compile_options(-Wlogical-not-parentheses) - # Warn about suspicious uses of logical operators in expressions - add_cxx_compile_options(-Wlogical-op) - # Warn if there exists a path from the function entry to a use of the variable that is uninitialized. - add_cxx_compile_options(-Wmaybe-uninitialized) - # Warn when the indentation of the code does not reflect the block structure - add_cxx_compile_options(-Wmisleading-indentation) - # Warn if a global function is defined without a previous declaration - disabled because of build times - # add_cxx_compile_options(-Wmissing-declarations) - # Warn if a user-supplied include directory does not exist - add_cxx_compile_options(-Wmissing-include-dirs) - # Obvious - add_cxx_compile_options(-Wnon-virtual-dtor) - # Obvious - add_cxx_compile_options(-Wno-return-local-addr) - # This warning is disabled due to false positives if compiled with libc++: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90037 - #add_cxx_compile_options(-Wnull-dereference) - # Obvious - add_cxx_compile_options(-Wodr) - # Obvious - add_cxx_compile_options(-Wold-style-cast) - # Warn when a function declaration hides virtual functions from a base class - # add_cxx_compile_options(-Woverloaded-virtual) - # Warn about placement new expressions with undefined behavior - add_cxx_compile_options(-Wplacement-new=2) - # Warn about anything that depends on the “size of” a function type or of void - add_cxx_compile_options(-Wpointer-arith) - # Warn if anything is declared more than once in the same scope - add_cxx_compile_options(-Wredundant-decls) - # Member initialization reordering - add_cxx_compile_options(-Wreorder) - # Obvious - add_cxx_compile_options(-Wshadow) - # Warn if left shifting a negative value - add_cxx_compile_options(-Wshift-negative-value) - # Warn about a definition of an unsized deallocation function - add_cxx_compile_options(-Wsized-deallocation) - # Warn when the sizeof operator is applied to a parameter that is declared as an array in a function definition - add_cxx_compile_options(-Wsizeof-array-argument) - # Warn for suspicious length parameters to certain string and memory built-in functions if the argument uses sizeof - add_cxx_compile_options(-Wsizeof-pointer-memaccess) - # Warn about overriding virtual functions that are not marked with the override keyword - add_cxx_compile_options(-Wsuggest-override) - # Warn whenever a switch statement has an index of boolean type and the case values are outside the range of a boolean type - add_cxx_compile_options(-Wswitch-bool) - # Warn if a self-comparison always evaluates to true or false - add_cxx_compile_options(-Wtautological-compare) - # Warn about trampolines generated for pointers to nested functions - add_cxx_compile_options(-Wtrampolines) - # Obvious - add_cxx_compile_options(-Wunused) - add_cxx_compile_options(-Wundef) - # Warn if vector operation is not implemented via SIMD capabilities of the architecture - add_cxx_compile_options(-Wvector-operation-performance) - # Warn when a literal 0 is used as null pointer constant. - add_cxx_compile_options(-Wzero-as-null-pointer-constant) - - # The following warnings are generally useful but had to be disabled because of compiler bugs with older GCCs. - # XXX: We should try again on more recent GCCs (--> see CMake variable GCC_MINIMUM_VERSION). - - # gcc10 stuck with this option while compiling GatherUtils code, anyway there are builds with clang that will warn - add_cxx_compile_options(-Wno-sequence-point) - # gcc10 false positive with this warning in MergeTreePartition.cpp - # inlined from 'void writeHexByteLowercase(UInt8, void*)' at ../src/Common/hex.h:39:11, - # inlined from 'DB::String DB::MergeTreePartition::getID(const DB::Block&) const' at ../src/Storages/MergeTree/MergeTreePartition.cpp:85:30: - # ../contrib/libc-headers/x86_64-linux-gnu/bits/string_fortified.h:34:33: error: writing 2 bytes into a region of size 0 [-Werror=stringop-overflow=] - # 34 | return __builtin___memcpy_chk (__dest, __src, __len, __bos0 (__dest)); - # For some reason (bug in gcc?) macro 'GCC diagnostic ignored "-Wstringop-overflow"' doesn't help. - add_cxx_compile_options(-Wno-stringop-overflow) - # reinterpretAs.cpp:182:31: error: ‘void* memcpy(void*, const void*, size_t)’ copying an object of non-trivial type - # ‘using ToFieldType = using FieldType = using UUID = struct StrongTypedef, DB::UUIDTag>’ - # {aka ‘struct StrongTypedef, DB::UUIDTag>’} from an array of ‘const char8_t’ - add_cxx_compile_options(-Wno-error=class-memaccess) - # Maybe false positive... - # In file included from /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:673, - # In function ‘void std::__1::__libcpp_operator_delete(_Args ...) [with _Args = {void*, long unsigned int}]’, - # inlined from ‘void std::__1::__do_deallocate_handle_size(void*, size_t, _Args ...) [with _Args = {}]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:271:34, - # inlined from ‘void std::__1::__libcpp_deallocate(void*, size_t, size_t)’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:285:41, - # inlined from ‘constexpr void std::__1::allocator<_Tp>::deallocate(_Tp*, size_t) [with _Tp = char]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:849:39, - # inlined from ‘static constexpr void std::__1::allocator_traits<_Alloc>::deallocate(std::__1::allocator_traits<_Alloc>::allocator_type&, std::__1::allocator_traits<_Alloc>::pointer, std::__1::allocator_traits<_Alloc>::size_type) [with _Alloc = std::__1::allocator]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/__memory/allocator_traits.h:476:24, - # inlined from ‘std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits; _Allocator = std::__1::allocator]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2219:35, - # inlined from ‘std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits; _Allocator = std::__1::allocator]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2213:1, - # inlined from ‘DB::JSONBuilder::JSONMap::Pair::~Pair()’ at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:90:12, - # inlined from ‘void DB::JSONBuilder::JSONMap::add(std::__1::string, DB::JSONBuilder::ItemPtr)’ at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:97:68, - # inlined from ‘virtual void DB::ExpressionStep::describeActions(DB::JSONBuilder::JSONMap&) const’ at /home/jakalletti/ClickHouse/ClickHouse/src/Processors/QueryPlan/ExpressionStep.cpp:102:12: - # /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:247:20: error: ‘void operator delete(void*, size_t)’ called on a pointer to an unallocated object ‘7598543875853023301’ [-Werror=free-nonheap-object] - add_cxx_compile_options(-Wno-error=free-nonheap-object) - # AggregateFunctionAvg.h:203:100: error: ‘this’ pointer is null [-Werror=nonnull] - add_cxx_compile_options(-Wno-error=nonnull) endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 695d7ec3f92..020fe1e1c5a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -9,8 +9,6 @@ if (WITH_COVERAGE) # disable coverage for contib files and build with optimisations if (COMPILER_CLANG) add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST}) - else() - add_compile_options(-O3 -DNDEBUG -finline-functions ${WITHOUT_COVERAGE_LIST}) endif() endif() @@ -107,6 +105,7 @@ add_contrib (libfarmhash) add_contrib (icu-cmake icu) add_contrib (h3-cmake h3) add_contrib (mariadb-connector-c-cmake mariadb-connector-c) +add_contrib (libfiu-cmake libfiu) if (ENABLE_TESTS) add_contrib (googletest-cmake googletest) @@ -136,27 +135,28 @@ add_contrib (aws-cmake ) add_contrib (base64-cmake base64) +if (NOT ARCH_S390X) add_contrib (simdjson-cmake simdjson) +endif() add_contrib (rapidjson-cmake rapidjson) add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) add_contrib (liburing-cmake liburing) add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv add_contrib (cassandra-cmake cassandra) # requires: libuv - -if (ENABLE_CURL_BUILD) +if (NOT OS_DARWIN) add_contrib (curl-cmake curl) add_contrib (azure-cmake azure) add_contrib (sentry-native-cmake sentry-native) # requires: curl endif() - add_contrib (fmtlib-cmake fmtlib) add_contrib (krb5-cmake krb5) add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5 add_contrib (libgsasl-cmake libgsasl) # requires krb5 add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl add_contrib (nats-io-cmake nats-io) -add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5 +add_contrib (isa-l-cmake isa-l) +add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3 add_contrib (cppkafka-cmake cppkafka) add_contrib (libpqxx-cmake libpqxx) @@ -178,19 +178,29 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) -add_contrib (qpl-cmake qpl) -add_contrib (morton-nd-cmake morton-nd) +if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42) + option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) +elseif(ENABLE_QPL) + message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher") +endif() +if (ENABLE_QPL) + add_contrib (idxd-config-cmake idxd-config) + add_contrib (qpl-cmake qpl) # requires: idxd-config +else() + message(STATUS "Not using QPL") +endif () + +add_contrib (morton-nd-cmake morton-nd) if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) endif() - add_contrib (annoy-cmake annoy) - add_contrib (xxHash-cmake xxHash) -add_contrib (google-benchmark-cmake google-benchmark) +add_contrib (libbcrypt-cmake libbcrypt) +add_contrib (google-benchmark-cmake google-benchmark) add_contrib (ulid-c-cmake ulid-c) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. diff --git a/contrib/arrow b/contrib/arrow index d03245f801f..1f1b3d35fb6 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666 +Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index ae6f270a768..16198887075 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A # ARROW_ORC + adapters/orc/CMakefiles set(ORC_SRCS + "${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h" + "${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc" + "${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc" "${ORC_SOURCE_SRC_DIR}/Exceptions.cc" "${ORC_SOURCE_SRC_DIR}/OrcFile.cc" "${ORC_SOURCE_SRC_DIR}/Reader.cc" @@ -129,13 +136,20 @@ set(ORC_SRCS "${ORC_SOURCE_SRC_DIR}/MemoryPool.cc" "${ORC_SOURCE_SRC_DIR}/RLE.cc" "${ORC_SOURCE_SRC_DIR}/RLEv1.cc" - "${ORC_SOURCE_SRC_DIR}/RLEv2.cc" + "${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc" + "${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc" + "${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc" "${ORC_SOURCE_SRC_DIR}/Statistics.cc" "${ORC_SOURCE_SRC_DIR}/StripeStream.cc" "${ORC_SOURCE_SRC_DIR}/Timezone.cc" "${ORC_SOURCE_SRC_DIR}/TypeImpl.cc" "${ORC_SOURCE_SRC_DIR}/Vector.cc" "${ORC_SOURCE_SRC_DIR}/Writer.cc" + "${ORC_SOURCE_SRC_DIR}/Adaptor.cc" + "${ORC_SOURCE_SRC_DIR}/BloomFilter.cc" + "${ORC_SOURCE_SRC_DIR}/Murmur3.cc" + "${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc" + "${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc" "${ORC_SOURCE_SRC_DIR}/io/InputStream.cc" "${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc" "${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc" @@ -188,6 +202,7 @@ set(ARROW_SRCS "${LIBRARY_DIR}/builder.cc" "${LIBRARY_DIR}/buffer.cc" "${LIBRARY_DIR}/chunked_array.cc" + "${LIBRARY_DIR}/chunk_resolver.cc" "${LIBRARY_DIR}/compare.cc" "${LIBRARY_DIR}/config.cc" "${LIBRARY_DIR}/datum.cc" @@ -254,6 +269,10 @@ set(ARROW_SRCS "${LIBRARY_DIR}/util/uri.cc" "${LIBRARY_DIR}/util/utf8.cc" "${LIBRARY_DIR}/util/value_parsing.cc" + "${LIBRARY_DIR}/util/byte_size.cc" + "${LIBRARY_DIR}/util/debug.cc" + "${LIBRARY_DIR}/util/tracing.cc" + "${LIBRARY_DIR}/util/atfork_internal.cc" "${LIBRARY_DIR}/vendored/base64.cpp" "${LIBRARY_DIR}/vendored/datetime/tz.cpp" @@ -287,9 +306,11 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/exec/source_node.cc" "${LIBRARY_DIR}/compute/exec/sink_node.cc" "${LIBRARY_DIR}/compute/exec/order_by_impl.cc" + "${LIBRARY_DIR}/compute/exec/partition_util.cc" "${LIBRARY_DIR}/compute/function.cc" "${LIBRARY_DIR}/compute/function_internal.cc" "${LIBRARY_DIR}/compute/kernel.cc" + "${LIBRARY_DIR}/compute/light_array.cc" "${LIBRARY_DIR}/compute/registry.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc" @@ -303,21 +324,28 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_compare.cc" "${LIBRARY_DIR}/compute/kernels/scalar_nested.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_random.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_round.cc" "${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_string.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc" "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc" "${LIBRARY_DIR}/compute/kernels/util_internal.cc" "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc" + "${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc" "${LIBRARY_DIR}/compute/kernels/vector_hash.cc" + "${LIBRARY_DIR}/compute/kernels/vector_rank.cc" + "${LIBRARY_DIR}/compute/kernels/vector_select_k.cc" "${LIBRARY_DIR}/compute/kernels/vector_nested.cc" "${LIBRARY_DIR}/compute/kernels/vector_replace.cc" "${LIBRARY_DIR}/compute/kernels/vector_selection.cc" @@ -326,13 +354,15 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/exec/union_node.cc" "${LIBRARY_DIR}/compute/exec/key_hash.cc" "${LIBRARY_DIR}/compute/exec/key_map.cc" - "${LIBRARY_DIR}/compute/exec/key_compare.cc" - "${LIBRARY_DIR}/compute/exec/key_encode.cc" "${LIBRARY_DIR}/compute/exec/util.cc" "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" "${LIBRARY_DIR}/compute/exec/hash_join.cc" "${LIBRARY_DIR}/compute/exec/hash_join_node.cc" "${LIBRARY_DIR}/compute/exec/task_util.cc" + "${LIBRARY_DIR}/compute/row/encode_internal.cc" + "${LIBRARY_DIR}/compute/row/grouper.cc" + "${LIBRARY_DIR}/compute/row/compare_internal.cc" + "${LIBRARY_DIR}/compute/row/row_internal.cc" "${LIBRARY_DIR}/ipc/dictionary.cc" "${LIBRARY_DIR}/ipc/feather.cc" @@ -343,7 +373,8 @@ set(ARROW_SRCS "${LIBRARY_DIR}/ipc/writer.cc" "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc" - "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc" + "${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc" + "${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc" ) add_definitions(-DARROW_WITH_LZ4) @@ -358,6 +389,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS}) add_definitions(-DARROW_WITH_ZSTD) SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS}) +add_definitions(-DARROW_WITH_BROTLI) +SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS}) + add_library(_arrow ${ARROW_SRCS}) @@ -372,6 +406,7 @@ target_link_libraries(_arrow PRIVATE ch_contrib::snappy ch_contrib::zlib ch_contrib::zstd + ch_contrib::brotli ) target_link_libraries(_arrow PUBLIC _orc) diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index 25474650d0e..63b3854eef9 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -6,7 +6,7 @@ if (NOT ENABLE_AVRO) return() endif() -set(AVROCPP_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++") +set(AVROCPP_ROOT_DIR "${PROJECT_SOURCE_DIR}/contrib/avro/lang/c++") set(AVROCPP_INCLUDE_DIR "${AVROCPP_ROOT_DIR}/api") set(AVROCPP_SOURCE_DIR "${AVROCPP_ROOT_DIR}/impl") diff --git a/contrib/aws b/contrib/aws index ecccfc026a4..ca02358dcc7 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit ecccfc026a42b30023289410a67024d561f4bf3e +Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3 diff --git a/contrib/aws-c-auth b/contrib/aws-c-auth index 30df6c407e2..97133a2b5db 160000 --- a/contrib/aws-c-auth +++ b/contrib/aws-c-auth @@ -1 +1 @@ -Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb +Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12 diff --git a/contrib/aws-c-common b/contrib/aws-c-common index 324fd1d973c..45dcb2849c8 160000 --- a/contrib/aws-c-common +++ b/contrib/aws-c-common @@ -1 +1 @@ -Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5 +Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff diff --git a/contrib/aws-c-event-stream b/contrib/aws-c-event-stream index 39bfa94a14b..2f9b60c42f9 160000 --- a/contrib/aws-c-event-stream +++ b/contrib/aws-c-event-stream @@ -1 +1 @@ -Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66 +Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d diff --git a/contrib/aws-c-http b/contrib/aws-c-http index 2c5a2a7d555..dd344619879 160000 --- a/contrib/aws-c-http +++ b/contrib/aws-c-http @@ -1 +1 @@ -Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc +Subproject commit dd34461987947672444d0bc872c5a733dfdb9711 diff --git a/contrib/aws-c-io b/contrib/aws-c-io index 5d32c453560..d58ed4f272b 160000 --- a/contrib/aws-c-io +++ b/contrib/aws-c-io @@ -1 +1 @@ -Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3 +Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89 diff --git a/contrib/aws-c-mqtt b/contrib/aws-c-mqtt index 882c689561a..33c3455cec8 160000 --- a/contrib/aws-c-mqtt +++ b/contrib/aws-c-mqtt @@ -1 +1 @@ -Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3 +Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194 diff --git a/contrib/aws-c-s3 b/contrib/aws-c-s3 index a41255ece72..d7bfe602d69 160000 --- a/contrib/aws-c-s3 +++ b/contrib/aws-c-s3 @@ -1 +1 @@ -Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b +Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900 diff --git a/contrib/aws-c-sdkutils b/contrib/aws-c-sdkutils index 25bf5cf225f..208a701fa01 160000 --- a/contrib/aws-c-sdkutils +++ b/contrib/aws-c-sdkutils @@ -1 +1 @@ -Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30 +Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972 diff --git a/contrib/aws-checksums b/contrib/aws-checksums index 48e7c0e0147..ad53be196a2 160000 --- a/contrib/aws-checksums +++ b/contrib/aws-checksums @@ -1 +1 @@ -Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d +Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0 diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index 52533cd6483..950a0e06cd0 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -52,8 +52,8 @@ endif() # Directories. SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws") -SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core") -SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3") +SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-core") +SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3") SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth") SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal") @@ -118,7 +118,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1") list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10") list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36") - + list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) list(APPEND AWS_PUBLIC_INCLUDES diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index ec0bea288f4..8a301b7e842 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4 +Subproject commit 8a301b7e842f1daed478090c869207300972379f diff --git a/contrib/aws-s2n-tls b/contrib/aws-s2n-tls index 0f1ba9e5c4a..71f4794b758 160000 --- a/contrib/aws-s2n-tls +++ b/contrib/aws-s2n-tls @@ -1 +1 @@ -Subproject commit 0f1ba9e5c4a67cb3898de0c0b4f911d4194dc8de +Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4 diff --git a/contrib/boost b/contrib/boost index 03d9ec9cd15..aec12eea7fc 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 03d9ec9cd159d14bd0b17c05138098451a1ea606 +Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17 diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 2a70c25ffe1..6f9dce0b042 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -92,6 +92,8 @@ add_library (boost::system ALIAS _boost_system) target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) # context +option (BOOST_USE_UCONTEXT "Use ucontext_t for context switching of boost::fiber within boost::context" OFF) + enable_language(ASM) SET(ASM_OPTIONS "-x assembler-with-cpp") @@ -100,26 +102,20 @@ set (SRCS_CONTEXT "${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp" ) -if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread")) - add_compile_definitions(BOOST_USE_UCONTEXT) - - if (SANITIZE STREQUAL "address") - add_compile_definitions(BOOST_USE_ASAN) - elseif (SANITIZE STREQUAL "thread") - add_compile_definitions(BOOST_USE_TSAN) - endif() - - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/fiber.cpp" - "${LIBRARY_DIR}/libs/context/src/continuation.cpp" - ) -endif() if (ARCH_AARCH64) - set (SRCS_CONTEXT ${SRCS_CONTEXT} - "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" - "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" - ) + if (OS_DARWIN) + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_macho_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_macho_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_macho_gas.S" + ) + else() + set (SRCS_CONTEXT ${SRCS_CONTEXT} + "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" + "${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S" + ) + endif() elseif (ARCH_PPC64LE) set (SRCS_CONTEXT ${SRCS_CONTEXT} "${LIBRARY_DIR}/libs/context/src/asm/jump_ppc64_sysv_elf_gas.S" @@ -152,10 +148,27 @@ else() ) endif() +if (SANITIZE OR BOOST_USE_UCONTEXT) + list (APPEND SRCS_CONTEXT + "${LIBRARY_DIR}/libs/context/src/fiber.cpp" + "${LIBRARY_DIR}/libs/context/src/continuation.cpp" + ) +endif() + add_library (_boost_context ${SRCS_CONTEXT}) add_library (boost::context ALIAS _boost_context) target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR}) +if (SANITIZE OR BOOST_USE_UCONTEXT) + target_compile_definitions(_boost_context PUBLIC BOOST_USE_UCONTEXT) +endif() + +if (SANITIZE STREQUAL "address") + target_compile_definitions(_boost_context PUBLIC BOOST_USE_ASAN) +elseif (SANITIZE STREQUAL "thread") + target_compile_definitions(_boost_context PUBLIC BOOST_USE_TSAN) +endif() + # coroutine set (SRCS_COROUTINE diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 828919476a7..51137f6d04e 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -111,6 +111,8 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips") set(ARCH "generic") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le") set(ARCH "ppc64le") +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64") + set(ARCH "riscv64") else() message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR}) endif() diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt index 59ff908b63a..32611e0e151 100644 --- a/contrib/cassandra-cmake/CMakeLists.txt +++ b/contrib/cassandra-cmake/CMakeLists.txt @@ -18,7 +18,7 @@ endif() # Need to use C++17 since the compilation is not possible with C++20 currently. set (CMAKE_CXX_STANDARD 17) -set(CASS_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/cassandra) +set(CASS_ROOT_DIR ${PROJECT_SOURCE_DIR}/contrib/cassandra) set(CASS_SRC_DIR "${CASS_ROOT_DIR}/src") set(CASS_INCLUDE_DIR "${CASS_ROOT_DIR}/include") diff --git a/contrib/cctz b/contrib/cctz index 7c78edd52b4..5e05432420f 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d +Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2 diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index f1ef9b53f7d..10070fbd949 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -26,7 +26,7 @@ endif () # StorageSystemTimeZones.generated.cpp is autogenerated each time during a build # data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX # as the library that's built using embedded tzdata is also specific to OS_LINUX -set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") # remove existing copies so that its generated fresh on each build. file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) diff --git a/contrib/consistent-hashing/consistent_hashing.cpp b/contrib/consistent-hashing/consistent_hashing.cpp index 347456eede3..c21e57bdaef 100644 --- a/contrib/consistent-hashing/consistent_hashing.cpp +++ b/contrib/consistent-hashing/consistent_hashing.cpp @@ -8,7 +8,7 @@ /* * (all numbers are written in big-endian manner: the least significant digit on the right) - * (only bit representations are used - no hex or octal, leading zeroes are ommited) + * (only bit representations are used - no hex or octal, leading zeroes are omitted) * * Consistent hashing scheme: * diff --git a/contrib/croaring b/contrib/croaring index 2c867e9f9c9..f40ed52bcdd 160000 --- a/contrib/croaring +++ b/contrib/croaring @@ -1 +1 @@ -Subproject commit 2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0 +Subproject commit f40ed52bcdd635840a79877cef4857315dba817c diff --git a/contrib/croaring-cmake/CMakeLists.txt b/contrib/croaring-cmake/CMakeLists.txt index 0bb7d0bd221..794c0426b96 100644 --- a/contrib/croaring-cmake/CMakeLists.txt +++ b/contrib/croaring-cmake/CMakeLists.txt @@ -17,7 +17,8 @@ set(SRCS "${LIBRARY_DIR}/src/containers/run.c" "${LIBRARY_DIR}/src/roaring.c" "${LIBRARY_DIR}/src/roaring_priority_queue.c" - "${LIBRARY_DIR}/src/roaring_array.c") + "${LIBRARY_DIR}/src/roaring_array.c" + "${LIBRARY_DIR}/src/memory.c") add_library(_roaring ${SRCS}) diff --git a/contrib/curl b/contrib/curl index c12fb3ddaf4..b0edf0b7dae 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit c12fb3ddaf48e709a7a4deaa55ec485e4df163ee +Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 8a570bd267c..70d9c2816dc 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -12,6 +12,9 @@ set (SRCS "${LIBRARY_DIR}/lib/noproxy.c" "${LIBRARY_DIR}/lib/idn.c" "${LIBRARY_DIR}/lib/cfilters.c" + "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/cf-haproxy.c" + "${LIBRARY_DIR}/lib/cf-https-connect.c" "${LIBRARY_DIR}/lib/file.c" "${LIBRARY_DIR}/lib/timeval.c" "${LIBRARY_DIR}/lib/base64.c" @@ -37,8 +40,8 @@ set (SRCS "${LIBRARY_DIR}/lib/strcase.c" "${LIBRARY_DIR}/lib/easy.c" "${LIBRARY_DIR}/lib/curl_fnmatch.c" + "${LIBRARY_DIR}/lib/curl_log.c" "${LIBRARY_DIR}/lib/fileinfo.c" - "${LIBRARY_DIR}/lib/wildcard.c" "${LIBRARY_DIR}/lib/krb5.c" "${LIBRARY_DIR}/lib/memdebug.c" "${LIBRARY_DIR}/lib/http_chunks.c" @@ -96,6 +99,7 @@ set (SRCS "${LIBRARY_DIR}/lib/rand.c" "${LIBRARY_DIR}/lib/curl_multibyte.c" "${LIBRARY_DIR}/lib/conncache.c" + "${LIBRARY_DIR}/lib/cf-h1-proxy.c" "${LIBRARY_DIR}/lib/http2.c" "${LIBRARY_DIR}/lib/smb.c" "${LIBRARY_DIR}/lib/curl_endian.c" @@ -113,12 +117,13 @@ set (SRCS "${LIBRARY_DIR}/lib/altsvc.c" "${LIBRARY_DIR}/lib/socketpair.c" "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/bufq.c" "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/dynhds.c" "${LIBRARY_DIR}/lib/hsts.c" "${LIBRARY_DIR}/lib/http_aws_sigv4.c" "${LIBRARY_DIR}/lib/mqtt.c" "${LIBRARY_DIR}/lib/rename.c" - "${LIBRARY_DIR}/lib/h2h3.c" "${LIBRARY_DIR}/lib/headers.c" "${LIBRARY_DIR}/lib/timediff.c" "${LIBRARY_DIR}/lib/vauth/vauth.c" @@ -133,6 +138,7 @@ set (SRCS "${LIBRARY_DIR}/lib/vauth/oauth2.c" "${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c" "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c" + "${LIBRARY_DIR}/lib/vquic/vquic.c" "${LIBRARY_DIR}/lib/vtls/openssl.c" "${LIBRARY_DIR}/lib/vtls/gtls.c" "${LIBRARY_DIR}/lib/vtls/vtls.c" @@ -147,9 +153,6 @@ set (SRCS "${LIBRARY_DIR}/lib/vtls/keylog.c" "${LIBRARY_DIR}/lib/vtls/x509asn1.c" "${LIBRARY_DIR}/lib/vtls/hostcheck.c" - "${LIBRARY_DIR}/lib/vquic/ngtcp2.c" - "${LIBRARY_DIR}/lib/vquic/quiche.c" - "${LIBRARY_DIR}/lib/vquic/msh3.c" "${LIBRARY_DIR}/lib/vssh/libssh2.c" "${LIBRARY_DIR}/lib/vssh/libssh.c" ) diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index 90fdde0c185..3905df03155 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -1,15 +1,30 @@ -set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest") +set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest") -add_library(_gtest "${SRC_DIR}/src/gtest-all.cc") +add_library(_gtest "${SRC_DIR}/googletest/src/gtest-all.cc") set_target_properties(_gtest PROPERTIES VERSION "1.0.0") target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0) -target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include") -target_include_directories(_gtest PRIVATE "${SRC_DIR}") +target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include") +target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest") -add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc") +add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc") set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0") target_link_libraries(_gtest_main PUBLIC _gtest) add_library(_gtest_all INTERFACE) target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) add_library(ch_contrib::gtest_all ALIAS _gtest_all) + + +add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc") +set_target_properties(_gmock PROPERTIES VERSION "1.0.0") +target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0) +target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include" "${SRC_DIR}/googletest/include") +target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock") + +add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc") +set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0") +target_link_libraries(_gmock_main PUBLIC _gmock) + +add_library(_gmock_all INTERFACE) +target_link_libraries(_gmock_all INTERFACE _gmock _gmock_main) +add_library(ch_contrib::gmock_all ALIAS _gmock_all) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index b1ed7e464b6..09ed2fe3f80 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -1,9 +1,4 @@ -# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a -if (SANITIZE STREQUAL "thread" AND COMPILER_GCC) - set(ENABLE_GRPC_DEFAULT OFF) -else() - set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES}) -endif() +set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES}) option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT}) if(NOT ENABLE_GRPC) @@ -48,6 +43,9 @@ set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) # We don't want to build C# extensions. set(gRPC_BUILD_CSHARP_EXT OFF) +# TODO: Remove this. We generally like to compile with C++23 but grpc isn't ready yet. +set (CMAKE_CXX_STANDARD 20) + set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares) set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE) add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}") diff --git a/contrib/idxd-config-cmake/CMakeLists.txt b/contrib/idxd-config-cmake/CMakeLists.txt new file mode 100644 index 00000000000..030252ec8e6 --- /dev/null +++ b/contrib/idxd-config-cmake/CMakeLists.txt @@ -0,0 +1,23 @@ +## accel_config is the utility library required by QPL-Deflate codec for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA). +set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config") +set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") +set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config-cmake/include") +set (SRCS + "${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c" + "${LIBACCEL_SOURCE_DIR}/util/log.c" + "${LIBACCEL_SOURCE_DIR}/util/sysfs.c" +) + +add_library(_accel-config ${SRCS}) + +target_compile_options(_accel-config PRIVATE "-D_GNU_SOURCE") + +target_include_directories(_accel-config BEFORE + PRIVATE ${UUID_DIR} + PRIVATE ${LIBACCEL_HEADER_DIR} + PRIVATE ${LIBACCEL_SOURCE_DIR}) + +target_include_directories(_accel-config SYSTEM BEFORE + PUBLIC ${LIBACCEL_SOURCE_DIR}/accfg) + +add_library(ch_contrib::accel-config ALIAS _accel-config) diff --git a/contrib/qpl-cmake/idxd-header/config.h b/contrib/idxd-config-cmake/include/config.h similarity index 100% rename from contrib/qpl-cmake/idxd-header/config.h rename to contrib/idxd-config-cmake/include/config.h diff --git a/contrib/isa-l b/contrib/isa-l new file mode 160000 index 00000000000..9f2b68f0575 --- /dev/null +++ b/contrib/isa-l @@ -0,0 +1 @@ +Subproject commit 9f2b68f05752097f0f16632fc4a9a86950831efd diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt new file mode 100644 index 00000000000..d4d6d648268 --- /dev/null +++ b/contrib/isa-l-cmake/CMakeLists.txt @@ -0,0 +1,203 @@ +option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES}) +if (ARCH_AARCH64) + # Disable ISA-L libray on aarch64. + set (ENABLE_ISAL_LIBRARY OFF) +endif () + +if (NOT ENABLE_ISAL_LIBRARY) + message(STATUS "Not using isa-l") + return() +endif() + +set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l") + +# The YASM and NASM assembers are somewhat mutually compatible. ISAL specifically needs NASM. If only YASM is installed, then check_language(ASM_NASM) +# below happily finds YASM, leading to weird errors at build time. Therefore, do an explicit check for NASM here. +find_program(NASM_PATH NAMES nasm) +if (NOT NASM_PATH) + message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!") +endif () + +include(CheckLanguage) +check_language(ASM_NASM) +if(NOT CMAKE_ASM_NASM_COMPILER) + message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!") +endif() + +enable_language(ASM_NASM) + +set(ISAL_C_SRC + ${ISAL_SOURCE_DIR}/crc/crc_base_aliases.c + ${ISAL_SOURCE_DIR}/crc/crc_base.c + ${ISAL_SOURCE_DIR}/crc/crc64_base.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_base.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_base_aliases.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_highlevel_func.c + ${ISAL_SOURCE_DIR}/erasure_code/gen_rs_matrix_limits.c + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_1tbl.c + ${ISAL_SOURCE_DIR}/igzip/adler32_base.c + ${ISAL_SOURCE_DIR}/igzip/encode_df.c + ${ISAL_SOURCE_DIR}/igzip/flatten_ll.c + ${ISAL_SOURCE_DIR}/igzip/generate_custom_hufftables.c + ${ISAL_SOURCE_DIR}/igzip/generate_static_inflate.c + ${ISAL_SOURCE_DIR}/igzip/huff_codes.c + ${ISAL_SOURCE_DIR}/igzip/hufftables_c.c + ${ISAL_SOURCE_DIR}/igzip/igzip_base_aliases.c + ${ISAL_SOURCE_DIR}/igzip/igzip_base.c + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_base.c + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body.c + ${ISAL_SOURCE_DIR}/igzip/igzip_inflate.c + ${ISAL_SOURCE_DIR}/igzip/igzip.c + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base_aliases.c + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base.c + ${ISAL_SOURCE_DIR}/programs/igzip_cli.c + ${ISAL_SOURCE_DIR}/raid/raid_base_aliases.c + ${ISAL_SOURCE_DIR}/raid/raid_base.c +) + +set(ISAL_ASM_SRC + ${ISAL_SOURCE_DIR}/crc/crc_multibinary.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_01.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_02.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4_02.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8_02.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_01.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_02.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_00.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_01.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_multibinary.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/ec_multibinary.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_sse.asm + ${ISAL_SOURCE_DIR}/igzip/adler32_avx2_4.asm + ${ISAL_SOURCE_DIR}/igzip/adler32_sse.asm + ${ISAL_SOURCE_DIR}/igzip/bitbuf2.asm + ${ISAL_SOURCE_DIR}/igzip/encode_df_04.asm + ${ISAL_SOURCE_DIR}/igzip/encode_df_06.asm + ${ISAL_SOURCE_DIR}/igzip/heap_macros.asm + ${ISAL_SOURCE_DIR}/igzip/huffman.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_body.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_compare_types.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_01.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_deflate_hash.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_finish.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_06.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body_h1_gr_bt.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_finish.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_inflate_multibinary.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_multibinary.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_06.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_01.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_04.asm + ${ISAL_SOURCE_DIR}/igzip/lz0a_const.asm + ${ISAL_SOURCE_DIR}/igzip/options.asm + ${ISAL_SOURCE_DIR}/igzip/proc_heap.asm + ${ISAL_SOURCE_DIR}/igzip/rfc1951_lookup.asm + ${ISAL_SOURCE_DIR}/igzip/stdmac.asm + ${ISAL_SOURCE_DIR}/mem/mem_multibinary.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx2.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx512.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_sse.asm + ${ISAL_SOURCE_DIR}/raid/pq_check_sse.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx2.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx512.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_sse.asm + ${ISAL_SOURCE_DIR}/raid/raid_multibinary.asm + ${ISAL_SOURCE_DIR}/raid/xor_check_sse.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_avx.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_avx512.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_sse.asm +) + +# Adding ISA-L library target +add_library(_isal ${ISAL_C_SRC} ${ISAL_ASM_SRC}) + +# Setting external and internal interfaces for ISA-L library +target_include_directories(_isal + PUBLIC ${ISAL_SOURCE_DIR}/include + PUBLIC ${ISAL_SOURCE_DIR}/igzip + PUBLIC ${ISAL_SOURCE_DIR}/crc + PUBLIC ${ISAL_SOURCE_DIR}/erasure_code) + +# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. +# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" +if (SANITIZE STREQUAL "undefined") + get_target_property(target_options _isal COMPILE_OPTIONS) + list(REMOVE_ITEM target_options "-fno-sanitize=undefined") + set_property(TARGET _isal PROPERTY COMPILE_OPTIONS ${target_options}) +endif() + +add_library(ch_contrib::isal ALIAS _isal) diff --git a/contrib/krb5 b/contrib/krb5 index f8262a1b548..b56ce6ba690 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit f8262a1b548eb29d97e059260042036255d07f8d +Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617 diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index ceaa270ad85..44058456ed4 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -15,10 +15,6 @@ if(NOT AWK_PROGRAM) message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.") endif() -if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) - add_compile_definitions(USE_BORINGSSL=1) -endif () - set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src") set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private") @@ -160,6 +156,13 @@ set(ALL_SRCS # "${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_trace.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/des_keys.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/f_parity.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/enc_provider/rc4.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/hash_provider/hash_md4.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4/md4.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c" # "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c" @@ -183,7 +186,6 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/krb/block_size.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/string_to_key.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum.c" - "${KRB5_SOURCE_DIR}/lib/crypto/krb/crypto_libinit.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/derive.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/random_to_key.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum_iov.c" @@ -217,9 +219,7 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_rc4.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/valid_cksumtype.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/nfold.c" - "${KRB5_SOURCE_DIR}/lib/crypto/krb/prng_fortuna.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/encrypt_length.c" - "${KRB5_SOURCE_DIR}/lib/crypto/krb/cmac.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/keyblocks.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/prf_rc4.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_pbkdf2.c" @@ -227,12 +227,11 @@ set(ALL_SRCS # "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c" - #"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c" + "${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c" + "${KRB5_SOURCE_DIR}/lib/crypto/openssl/kdf.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/pbkdf2.c" - "${KRB5_SOURCE_DIR}/lib/crypto/openssl/init.c" - "${KRB5_SOURCE_DIR}/lib/crypto/openssl/stubs.c" # "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_crc32.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_evp.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/des/des_keys.c" @@ -312,7 +311,6 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/krb5/krb/allow_weak.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_rep.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_priv.c" - "${KRB5_SOURCE_DIR}/lib/krb5/krb/s4u_authdata.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/preauth_otp.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/init_keyblock.c" "${KRB5_SOURCE_DIR}/lib/krb5/krb/ser_addr.c" @@ -476,6 +474,14 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/krb5/krb5_libinit.c" ) +if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) + add_compile_definitions(USE_BORINGSSL=1) +else() + set(ALL_SRCS ${ALL_SRCS} + "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c" + ) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et" COMMAND /bin/sh @@ -675,6 +681,7 @@ target_include_directories(_krb5 PRIVATE "${KRB5_SOURCE_DIR}/lib/gssapi/krb5" "${KRB5_SOURCE_DIR}/lib/gssapi/spnego" "${KRB5_SOURCE_DIR}/util/et" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4" "${KRB5_SOURCE_DIR}/lib/crypto/openssl" "${KRB5_SOURCE_DIR}/lib/crypto/krb" "${KRB5_SOURCE_DIR}/util/profile" @@ -688,6 +695,7 @@ target_include_directories(_krb5 PRIVATE target_compile_definitions(_krb5 PRIVATE KRB5_PRIVATE + CRYPTO_OPENSSL _GSS_STATIC_LINK=1 KRB5_DEPRECATED=1 LOCALEDIR="/usr/local/share/locale" diff --git a/contrib/libbcrypt b/contrib/libbcrypt new file mode 160000 index 00000000000..8aa32ad94eb --- /dev/null +++ b/contrib/libbcrypt @@ -0,0 +1 @@ +Subproject commit 8aa32ad94ebe06b76853b0767c910c9fbf7ccef4 diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt new file mode 100644 index 00000000000..d40d7f9195e --- /dev/null +++ b/contrib/libbcrypt-cmake/CMakeLists.txt @@ -0,0 +1,19 @@ +option(ENABLE_BCRYPT "Enable bcrypt" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_BCRYPT) + message(STATUS "Not using bcrypt") + return() +endif() + +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt") + +set(SRCS + "${LIBRARY_DIR}/bcrypt.c" + "${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c" + "${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c" + "${LIBRARY_DIR}/crypt_blowfish/wrapper.c" +) + +add_library(_bcrypt ${SRCS}) +target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}") +add_library(ch_contrib::bcrypt ALIAS _bcrypt) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 21ed76f8b6f..a13e4f0f60a 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -69,11 +69,6 @@ if (USE_MUSL) target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) endif () -# Override the deduced attribute support that causes error. -if (OS_DARWIN AND COMPILER_GCC) - add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX) -endif () - target_compile_options(cxx PUBLIC $<$:-nostdinc++>) # Third party library may have substandard code. @@ -84,11 +79,6 @@ target_compile_definitions(cxx PUBLIC -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS target_link_libraries(cxx PUBLIC cxxabi) -# For __udivmodti4, __divmodti4. -if (OS_DARWIN AND COMPILER_GCC) - target_link_libraries(cxx PRIVATE gcc) -endif () - install( TARGETS cxx EXPORT global diff --git a/contrib/libfarmhash/CMakeLists.txt b/contrib/libfarmhash/CMakeLists.txt index a0533a93f17..436bc3d0108 100644 --- a/contrib/libfarmhash/CMakeLists.txt +++ b/contrib/libfarmhash/CMakeLists.txt @@ -6,6 +6,10 @@ if (MSVC) target_compile_definitions (_farmhash PRIVATE FARMHASH_NO_BUILTIN_EXPECT=1) endif () +if (ARCH_S390X) + add_compile_definitions(WORDS_BIGENDIAN) +endif () + target_include_directories (_farmhash BEFORE PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_library(ch_contrib::farmhash ALIAS _farmhash) diff --git a/contrib/libfiu b/contrib/libfiu new file mode 160000 index 00000000000..b85edbde4cf --- /dev/null +++ b/contrib/libfiu @@ -0,0 +1 @@ +Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5 diff --git a/contrib/libfiu-cmake/CMakeLists.txt b/contrib/libfiu-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e805491edbb --- /dev/null +++ b/contrib/libfiu-cmake/CMakeLists.txt @@ -0,0 +1,20 @@ +if (NOT ENABLE_FIU) + message (STATUS "Not using fiu") + return () +endif () + +set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") + +set(FIU_SOURCES + ${FIU_DIR}/libfiu/fiu.c + ${FIU_DIR}/libfiu/fiu-rc.c + ${FIU_DIR}/libfiu/backtrace.c + ${FIU_DIR}/libfiu/wtable.c +) + +set(FIU_HEADERS "${FIU_DIR}/libfiu") + +add_library(_fiu ${FIU_SOURCES}) +target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE) +target_include_directories(_fiu PUBLIC ${FIU_HEADERS}) +add_library(ch_contrib::fiu ALIAS _fiu) diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index 9ee3ce77215..164b89253fa 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit 9ee3ce77215fca83b7fdfcfe2186a3db0d0bdb74 +Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d diff --git a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c deleted file mode 100644 index 0ef4eda583e..00000000000 --- a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.c +++ /dev/null @@ -1,10 +0,0 @@ -#include - -int main() -{ - // We can't test "char *p = strerror_r()" because that only causes a - // compiler warning when strerror_r returns an integer. - char *buf = 0; - int i = strerror_r(0, buf, 100); - return i; -} diff --git a/contrib/libhdfs3-cmake/CMake/Functions.cmake b/contrib/libhdfs3-cmake/CMake/Functions.cmake deleted file mode 100644 index a771b6043fb..00000000000 --- a/contrib/libhdfs3-cmake/CMake/Functions.cmake +++ /dev/null @@ -1,46 +0,0 @@ -FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS) - - IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE") - SET(PATH ".") - IF (${ARGC} EQUAL 4) - LIST(GET ARGV 3 PATH) - ENDIF () - ENDIF() - - IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE") - UNSET(${RETURN_VALUE}) - FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}") - LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES}) - - FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*) - - FOREACH(DIR ${SUBDIRS}) - IF (IS_DIRECTORY ${PATH}/${DIR}) - IF (NOT "${DIR}" STREQUAL "CMAKEFILES") - FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}") - LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES}) - ENDIF() - ENDIF() - ENDFOREACH() - ELSE () - FILE(GLOB ${RETURN_VALUE} "${PATTERN}") - - FOREACH (PATH ${SOURCE_SUBDIRS}) - FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}") - LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES}) - ENDFOREACH(PATH ${SOURCE_SUBDIRS}) - ENDIF () - - IF (${FILTER_OUT}) - LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT}) - ENDIF() - - SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE) -ENDFUNCTION(AUTO_SOURCES) - -FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE) - FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*") - IF (FILE_CONTENTS) - SET(${RETURN_VALUE} TRUE PARENT_SCOPE) - ENDIF() -ENDFUNCTION(CONTAINS_STRING) diff --git a/contrib/libhdfs3-cmake/CMake/Options.cmake b/contrib/libhdfs3-cmake/CMake/Options.cmake deleted file mode 100644 index 933b24fb9b5..00000000000 --- a/contrib/libhdfs3-cmake/CMake/Options.cmake +++ /dev/null @@ -1,44 +0,0 @@ -OPTION(ENABLE_SSE "enable SSE4.2 builtin function" ON) - -INCLUDE (CheckFunctionExists) -CHECK_FUNCTION_EXISTS(dladdr HAVE_DLADDR) -CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP) - -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") -SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing") - -IF(ENABLE_SSE STREQUAL ON AND ARCH_AMD64) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") -ENDIF() - -IF(NOT TEST_HDFS_PREFIX) -SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE) -ENDIF(NOT TEST_HDFS_PREFIX) - -ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}") -ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS) -ADD_DEFINITIONS(-D_GNU_SOURCE) -ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP) - -TRY_COMPILE(STRERROR_R_RETURN_INT - ${CMAKE_CURRENT_BINARY_DIR} - "${CMAKE_CURRENT_SOURCE_DIR}/CMake/CMakeTestCompileStrerror.c" - CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" - OUTPUT_VARIABLE OUTPUT) - -MESSAGE(STATUS "Checking whether strerror_r returns an int") - -IF(STRERROR_R_RETURN_INT) - MESSAGE(STATUS "Checking whether strerror_r returns an int -- yes") -ELSE(STRERROR_R_RETURN_INT) - MESSAGE(STATUS "Checking whether strerror_r returns an int -- no") -ENDIF(STRERROR_R_RETURN_INT) - -set(HAVE_STEADY_CLOCK 1) -set(HAVE_NESTED_EXCEPTION 1) - -SET(HAVE_BOOST_CHRONO 0) -SET(HAVE_BOOST_ATOMIC 0) - -SET(HAVE_STD_CHRONO 1) -SET(HAVE_STD_ATOMIC 1) diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake deleted file mode 100644 index fec1d974519..00000000000 --- a/contrib/libhdfs3-cmake/CMake/Platform.cmake +++ /dev/null @@ -1,42 +0,0 @@ -IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - SET(OS_LINUX true CACHE INTERNAL "Linux operating system") -ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system") -ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux") - MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"") -ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - -IF(CMAKE_COMPILER_IS_GNUCXX) - EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpfullversion OUTPUT_VARIABLE GCC_COMPILER_VERSION) - - IF (NOT GCC_COMPILER_VERSION) - EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION) - - IF (NOT GCC_COMPILER_VERSION) - MESSAGE(FATAL_ERROR "Cannot get gcc version") - ENDIF (NOT GCC_COMPILER_VERSION) - ENDIF (NOT GCC_COMPILER_VERSION) - - STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION}) - - LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH) - LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR) - if (GCC_COMPILER_VERSION_LENGTH GREATER 1) - LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR) - else () - set (GCC_COMPILER_VERSION_MINOR 0) - endif () - - SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version") - SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version") - - MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})") -ELSE(CMAKE_COMPILER_IS_GNUCXX) - EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE COMPILER_OUTPUT) - IF(COMPILER_OUTPUT MATCHES "clang") - SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler") - MESSAGE(STATUS "checking compiler: CLANG") - ELSE(COMPILER_OUTPUT MATCHES "clang") - MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"") - ENDIF(COMPILER_OUTPUT MATCHES "clang") -ENDIF(CMAKE_COMPILER_IS_GNUCXX) diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index c22cac731fe..e2f122e282a 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -21,10 +21,17 @@ set(HDFS3_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3") set(HDFS3_SOURCE_DIR "${HDFS3_ROOT_DIR}/src") set(HDFS3_COMMON_DIR "${HDFS3_SOURCE_DIR}/common") -# module -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH}) -include(Platform) -include(Options) +ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}") +ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS) +ADD_DEFINITIONS(-D_GNU_SOURCE) +ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP) +ADD_DEFINITIONS(-DHAVE_NANOSLEEP) +set(HAVE_STEADY_CLOCK 1) +set(HAVE_NESTED_EXCEPTION 1) +SET(HAVE_BOOST_CHRONO 0) +SET(HAVE_BOOST_ATOMIC 0) +SET(HAVE_STD_CHRONO 1) +SET(HAVE_STD_ATOMIC 1) # source set(PROTO_FILES @@ -70,6 +77,30 @@ set(SRCS "${HDFS3_SOURCE_DIR}/client/Token.cpp" "${HDFS3_SOURCE_DIR}/client/PacketPool.cpp" "${HDFS3_SOURCE_DIR}/client/OutputStream.cpp" + "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/ByteBufferDecodingState.cpp" + "${HDFS3_SOURCE_DIR}/client/ByteBufferEncodingState.cpp" + "${HDFS3_SOURCE_DIR}/client/CoderUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/ECChunk.cpp" + "${HDFS3_SOURCE_DIR}/client/ErasureCoderOptions.cpp" + "${HDFS3_SOURCE_DIR}/client/GF256.cpp" + "${HDFS3_SOURCE_DIR}/client/GaloisField.cpp" + "${HDFS3_SOURCE_DIR}/client/NativeRSRawDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/NativeRSRawEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/Preconditions.cpp" + "${HDFS3_SOURCE_DIR}/client/RSUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureCoderFactory.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/StatefulStripeReader.cpp" + "${HDFS3_SOURCE_DIR}/client/StripeReader.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedBlockUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedInputStreamImpl.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedOutputStreamImpl.cpp" + "${HDFS3_SOURCE_DIR}/client/SystemECPolicies.cpp" + "${HDFS3_SOURCE_DIR}/client/dump.cpp" + "${HDFS3_SOURCE_DIR}/client/erasure_coder.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp" @@ -148,6 +179,11 @@ if (TARGET OpenSSL::SSL) target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() +if (TARGET ch_contrib::isal) + target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal) + add_definitions(-DHADOOP_ISAL_LIBRARY) +endif() + add_library(ch_contrib::hdfs ALIAS _hdfs3) if (ENABLE_CLICKHOUSE_BENCHMARK) diff --git a/contrib/libpqxx b/contrib/libpqxx index a4e83483927..bdd6540fb95 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit a4e834839270a8c1f7ff1db351ba85afced3f0e2 +Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index 7ca2cef2251..928fdcdd7e6 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -1,13 +1,7 @@ -# once fixed, please remove similar places in CMakeLists of libuv users (search "ch_contrib::uv") -if (OS_DARWIN AND COMPILER_GCC) - message (WARNING "libuv cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082") - return() -endif() - # This file is a modified version of contrib/libuv/CMakeLists.txt -set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv") -set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv") +set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/libuv") +set (BINARY_DIR "${PROJECT_BINARY_DIR}/contrib/libuv") set(uv_sources src/fs-poll.c diff --git a/contrib/llvm-project b/contrib/llvm-project index a8bf69e9cd3..d857c707fcc 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit a8bf69e9cd39a23140a2b633c172d201484172da +Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9 diff --git a/contrib/mariadb-connector-c-cmake/CMakeLists.txt b/contrib/mariadb-connector-c-cmake/CMakeLists.txt index 50287c54ac1..18d1510a57b 100644 --- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt +++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt @@ -15,7 +15,7 @@ endif() # This is the LGPL libmariadb project. -set(CC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/mariadb-connector-c) +set(CC_SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/mariadb-connector-c) set(CC_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(WITH_SSL ON) diff --git a/contrib/murmurhash/src/MurmurHash2.cpp b/contrib/murmurhash/src/MurmurHash2.cpp index 1c4469b0a02..0bd0a352dc4 100644 --- a/contrib/murmurhash/src/MurmurHash2.cpp +++ b/contrib/murmurhash/src/MurmurHash2.cpp @@ -31,6 +31,40 @@ #define BIG_CONSTANT(x) (x##LLU) #endif // !defined(_MSC_VER) +// +//----------------------------------------------------------------------------- +// Block read - on little-endian machines this is a single load, +// while on big-endian or unknown machines the byte accesses should +// still get optimized into the most efficient instruction. +static inline uint32_t getblock ( const uint32_t * p ) +{ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return *p; +#else + const uint8_t *c = (const uint8_t *)p; + return (uint32_t)c[0] | + (uint32_t)c[1] << 8 | + (uint32_t)c[2] << 16 | + (uint32_t)c[3] << 24; +#endif +} + +static inline uint64_t getblock ( const uint64_t * p ) +{ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return *p; +#else + const uint8_t *c = (const uint8_t *)p; + return (uint64_t)c[0] | + (uint64_t)c[1] << 8 | + (uint64_t)c[2] << 16 | + (uint64_t)c[3] << 24 | + (uint64_t)c[4] << 32 | + (uint64_t)c[5] << 40 | + (uint64_t)c[6] << 48 | + (uint64_t)c[7] << 56; +#endif +} //----------------------------------------------------------------------------- @@ -52,7 +86,7 @@ uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed ) while(len >= 4) { - uint32_t k = *(uint32_t*)data; + uint32_t k = getblock((const uint32_t *)data); k *= m; k ^= k >> r; @@ -105,7 +139,7 @@ uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed ) while(data != end) { - uint64_t k = *data++; + uint64_t k = getblock(data++); k *= m; k ^= k >> r; @@ -151,12 +185,12 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed ) while(len >= 8) { - uint32_t k1 = *data++; + uint32_t k1 = getblock(data++); k1 *= m; k1 ^= k1 >> r; k1 *= m; h1 *= m; h1 ^= k1; len -= 4; - uint32_t k2 = *data++; + uint32_t k2 = getblock(data++); k2 *= m; k2 ^= k2 >> r; k2 *= m; h2 *= m; h2 ^= k2; len -= 4; @@ -164,7 +198,7 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed ) if(len >= 4) { - uint32_t k1 = *data++; + uint32_t k1 = getblock(data++); k1 *= m; k1 ^= k1 >> r; k1 *= m; h1 *= m; h1 ^= k1; len -= 4; @@ -215,7 +249,7 @@ uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed ) while(len >= 4) { - uint32_t k = *(uint32_t*)data; + uint32_t k = getblock((const uint32_t *)data); mmix(h,k); @@ -278,7 +312,7 @@ public: while(len >= 4) { - uint32_t k = *(uint32_t*)data; + uint32_t k = getblock((const uint32_t *)data); mmix(m_hash,k); @@ -427,7 +461,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed ) while(len >= 4) { - d = *(uint32_t *)data; + d = getblock((const uint32_t *)data); t = (t >> sr) | (d << sl); uint32_t k = t; @@ -492,7 +526,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed ) { while(len >= 4) { - uint32_t k = *(uint32_t *)data; + uint32_t k = getblock((const uint32_t *)data); MIX(h,k,m); diff --git a/contrib/murmurhash/src/MurmurHash3.cpp b/contrib/murmurhash/src/MurmurHash3.cpp index cf5158e97ad..6573c470be3 100644 --- a/contrib/murmurhash/src/MurmurHash3.cpp +++ b/contrib/murmurhash/src/MurmurHash3.cpp @@ -55,14 +55,32 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r ) FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) { - uint32_t res; - memcpy(&res, p + i, sizeof(res)); - return res; +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return p[i]; +#else + const uint8_t *c = (const uint8_t *)&p[i]; + return (uint32_t)c[0] | + (uint32_t)c[1] << 8 | + (uint32_t)c[2] << 16 | + (uint32_t)c[3] << 24; +#endif } FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) { +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) return p[i]; +#else + const uint8_t *c = (const uint8_t *)&p[i]; + return (uint64_t)c[0] | + (uint64_t)c[1] << 8 | + (uint64_t)c[2] << 16 | + (uint64_t)c[3] << 24 | + (uint64_t)c[4] << 32 | + (uint64_t)c[5] << 40 | + (uint64_t)c[6] << 48 | + (uint64_t)c[7] << 56; +#endif } //----------------------------------------------------------------------------- @@ -329,9 +347,13 @@ void MurmurHash3_x64_128 ( const void * key, const size_t len, h1 += h2; h2 += h1; - +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ((uint64_t*)out)[0] = h1; ((uint64_t*)out)[1] = h2; +#else + ((uint64_t*)out)[0] = h2; + ((uint64_t*)out)[1] = h1; +#endif } //----------------------------------------------------------------------------- diff --git a/contrib/orc b/contrib/orc index f9a393ed243..c5d7755ba0b 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit f9a393ed2433a60034795284f82d093b348f2102 +Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761 diff --git a/contrib/qpl b/contrib/qpl index d75a29d95d8..3f8f5cea277 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit d75a29d95d8a548297fce3549d21020005364dc8 +Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679 diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt index fc5548b0652..4e6c66fe731 100644 --- a/contrib/qpl-cmake/CMakeLists.txt +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -1,36 +1,5 @@ ## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available. -if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512)) - option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES}) -elseif(ENABLE_QPL) - message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support") -endif() - -if (NOT ENABLE_QPL) - message(STATUS "Not using QPL") - return() -endif() - -## QPL has build dependency on libaccel-config. Here is to build libaccel-config which is required by QPL. -## libaccel-config is the utility library for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA). -set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config") set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") -set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake/idxd-header") -set (SRCS - "${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c" - "${LIBACCEL_SOURCE_DIR}/util/log.c" - "${LIBACCEL_SOURCE_DIR}/util/sysfs.c" -) - -add_library(accel-config ${SRCS}) - -target_compile_options(accel-config PRIVATE "-D_GNU_SOURCE") - -target_include_directories(accel-config BEFORE - PRIVATE ${UUID_DIR} - PRIVATE ${LIBACCEL_HEADER_DIR} - PRIVATE ${LIBACCEL_SOURCE_DIR}) - -## QPL build start here. set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl") set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources") set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl") @@ -40,9 +9,10 @@ set (LOG_HW_INIT OFF) set (SANITIZE_MEMORY OFF) set (SANITIZE_THREADS OFF) set (LIB_FUZZING_ENGINE OFF) +set (DYNAMIC_LOADING_LIBACCEL_CONFIG OFF) function(GetLibraryVersion _content _outputVar) - string(REGEX MATCHALL "Qpl VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") + string(REGEX MATCHALL "QPL VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE) endfunction() @@ -52,8 +22,11 @@ GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION) message(STATUS "Intel QPL version: ${QPL_VERSION}") # There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api. -# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib. -# Output ch_contrib::qpl by linking with 7 library targets. +# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib. +# Output ch_contrib::qpl by linking with 8 library targets. + +# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link +# only upstream isal (ch_contrib::isal) but at this point we can't. include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake") @@ -118,31 +91,36 @@ set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm add_library(isal OBJECT ${ISAL_C_SRC}) add_library(isal_asm OBJECT ${ISAL_ASM_SRC}) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + # Setting external and internal interfaces for ISA-L library target_include_directories(isal - PUBLIC $ - PRIVATE ${QPL_SRC_DIR}/isal/include - PUBLIC ${QPL_SRC_DIR}/isal/igzip) + PUBLIC $ + PRIVATE ${QPL_SRC_DIR}/isal/include + PUBLIC ${QPL_SRC_DIR}/isal/igzip) + +set_target_properties(isal PROPERTIES + CXX_STANDARD 11 + C_STANDARD 99) target_compile_options(isal PRIVATE "$<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>" "$<$:>" "$<$:>") +# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". +# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/" PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/" PRIVATE "-I${QPL_SRC_DIR}/isal/crc/" + PRIVATE "-DHAVE_AS_KNOWS_AVX512" + PRIVATE "-DAS_FEATURE_LEVEL=10" PRIVATE "-DQPL_LIB") -# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". -# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available". -# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. -if (ENABLE_AVX512) - target_compile_options(isal_asm PRIVATE "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10") -else() - target_compile_options(isal_asm PRIVATE "-DAS_FEATURE_LEVEL=5") -endif() - # Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. # Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" if (SANITIZE STREQUAL "undefined") @@ -156,74 +134,97 @@ target_compile_definitions(isal PUBLIC NDEBUG) # [SUBDIR]core-sw -# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively. +# Create set of libraries corresponding to supported platforms for SW fallback which are implemented by AVX512 and non-AVX512 instructions respectively. # The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function. -# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch. -# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API. -# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation. +# Hence, here we don't need put ENABLE_AVX512 CMake switch. -# Find Core Sources -file(GLOB SOURCES - ${QPL_SRC_DIR}/core-sw/src/checksums/*.c - ${QPL_SRC_DIR}/core-sw/src/filtering/*.c - ${QPL_SRC_DIR}/core-sw/src/other/*.c - ${QPL_SRC_DIR}/core-sw/src/compression/*.c) +get_list_of_supported_optimizations(PLATFORMS_LIST) -file(GLOB DATA_SOURCES - ${QPL_SRC_DIR}/core-sw/src/data/*.c) +foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) + # Find Core Sources + file(GLOB SOURCES + ${QPL_SRC_DIR}/core-sw/src/checksums/*.c + ${QPL_SRC_DIR}/core-sw/src/filtering/*.c + ${QPL_SRC_DIR}/core-sw/src/other/*.c + ${QPL_SRC_DIR}/core-sw/src/compression/*.c) -# Create avx512 library -add_library(qplcore_avx512 OBJECT ${SOURCES}) + file(GLOB DATA_SOURCES + ${QPL_SRC_DIR}/core-sw/src/data/*.c) -target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2) + # Create library + add_library(qplcore_${PLATFORM_ID} OBJECT ${SOURCES}) -target_include_directories(qplcore_avx512 - PUBLIC $ - PUBLIC $ - PUBLIC $ - PRIVATE $) + set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) -set_target_properties(qplcore_avx512 PROPERTIES - $<$:C_STANDARD 17>) + target_include_directories(qplcore_${PLATFORM_ID} + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ + PRIVATE $) -target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal) + set_target_properties(qplcore_${PLATFORM_ID} PROPERTIES + $<$:C_STANDARD 17>) -target_compile_options(qplcore_avx512 - PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} - PRIVATE -march=skylake-avx512 - PRIVATE "$<$:>" - PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + target_compile_options(qplcore_${PLATFORM_ID} + PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} + PRIVATE "$<$:>" + PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + # Set specific compiler options and/or definitions based on a platform + if (${PLATFORM_ID} MATCHES "avx512") + target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2) + target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512) + else() # Create default px library + target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=0) + endif() -target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK) + target_link_libraries(qplcore_${PLATFORM_ID} isal) +endforeach() # -# Create px library +# Create dispatcher between platforms and auto-generated wrappers # -#set(CMAKE_INCLUDE_CURRENT_DIR ON) +file(GLOB SW_DISPATCHER_SOURCES ${QPL_SRC_DIR}/core-sw/dispatcher/*.cpp) -# Create library -add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES}) +add_library(qplcore_sw_dispatcher OBJECT ${SW_DISPATCHER_SOURCES}) -target_compile_definitions(qplcore_px PRIVATE PLATFORM=0) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) -target_include_directories(qplcore_px - PUBLIC $ - PUBLIC $ - PUBLIC $ - PRIVATE $) +target_include_directories(qplcore_sw_dispatcher + PUBLIC $) -set_target_properties(qplcore_px PROPERTIES - $<$:C_STANDARD 17>) +# Generate kernel wrappers +generate_unpack_kernel_arrays(${QPL_BINARY_DIR} "${PLATFORMS_LIST}") -target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS}) +foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) + file(GLOB GENERATED_${PLATFORM_ID}_TABLES_SRC ${QPL_BINARY_DIR}/generated/${PLATFORM_ID}_*.cpp) -target_compile_options(qplcore_px - PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS} - PRIVATE "$<$:>" - PRIVATE "$<$:-O3;-D_FORTIFY_SOURCE=2>") + target_sources(qplcore_sw_dispatcher PRIVATE ${GENERATED_${PLATFORM_ID}_TABLES_SRC}) -target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK) + # Set specific compiler options and/or definitions based on a platform + if (${PLATFORM_ID} MATCHES "avx512") + set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) + else() + set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) + endif() + + target_include_directories(qplcore_sw_dispatcher + PUBLIC $) +endforeach() + +set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17) + +# w/a for build compatibility with ISAL codebase +target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB) + +target_compile_options(qplcore_sw_dispatcher + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; + ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; + $<$:-O3;-D_FORTIFY_SOURCE=2>> + PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>) # [SUBDIR]core-iaa file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c @@ -237,11 +238,20 @@ file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c # Create library add_library(core_iaa OBJECT ${HW_PATH_SRC}) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + target_include_directories(core_iaa PRIVATE ${UUID_DIR} PUBLIC $ - PRIVATE $ - PRIVATE $) + PUBLIC $ + PRIVATE $ # status.h in own_checkers.h + PRIVATE $ # own_checkers.h + PRIVATE $) + +set_target_properties(core_iaa PROPERTIES + $<$:C_STANDARD 17> + CXX_STANDARD 17) target_compile_options(core_iaa PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; @@ -251,11 +261,10 @@ target_compile_features(core_iaa PRIVATE c_std_11) target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK PRIVATE $<$: BLOCK_ON_FAULT_ENABLED> - PRIVATE $<$:LOG_HW_INIT>) + PRIVATE $<$:LOG_HW_INIT> + PRIVATE $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) # [SUBDIR]middle-layer -generate_unpack_kernel_arrays(${QPL_BINARY_DIR}) - file(GLOB MIDDLE_LAYER_SRC ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp ${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp @@ -270,14 +279,12 @@ file(GLOB MIDDLE_LAYER_SRC ${QPL_SRC_DIR}/middle-layer/inflate/*.cpp ${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo -file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp) -file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp) - add_library(middle_layer_lib OBJECT - ${GENERATED_PX_TABLES_SRC} - ${GENERATED_AVX512_TABLES_SRC} ${MIDDLE_LAYER_SRC}) +set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS + $) + target_compile_options(middle_layer_lib PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; @@ -288,41 +295,39 @@ target_compile_definitions(middle_layer_lib PUBLIC QPL_VERSION="${QPL_VERSION}" PUBLIC $<$:LOG_HW_INIT> PUBLIC $<$:QPL_EFFICIENT_WAIT> - PUBLIC QPL_BADARG_CHECK) + PUBLIC QPL_BADARG_CHECK + PUBLIC $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) -set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) -set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) +set_target_properties(middle_layer_lib PROPERTIES CXX_STANDARD 17) target_include_directories(middle_layer_lib PRIVATE ${UUID_DIR} PUBLIC $ PUBLIC $ - PUBLIC $ - PUBLIC $ + PUBLIC $ PUBLIC $ PUBLIC $) target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB) # [SUBDIR]c_api -file(GLOB_RECURSE QPL_C_API_SRC +file(GLOB_RECURSE QPL_C_API_SRC ${QPL_SRC_DIR}/c_api/*.c ${QPL_SRC_DIR}/c_api/*.cpp) -add_library(_qpl STATIC ${QPL_C_API_SRC} - $ - $ - $ - $ - $ - $ - $) +get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS) + +add_library(_qpl STATIC ${QPL_C_API_SRC} ${LIB_DEPS}) target_include_directories(_qpl - PUBLIC $ + PUBLIC $ $ PRIVATE $ PRIVATE $) +set_target_properties(_qpl PROPERTIES + $<$:C_STANDARD 17> + CXX_STANDARD 17) + target_compile_options(_qpl PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}; ${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS}; @@ -332,11 +337,15 @@ target_compile_options(_qpl target_compile_definitions(_qpl PRIVATE -DQPL_LIB PRIVATE -DQPL_BADARG_CHECK + PRIVATE $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG> PUBLIC -DENABLE_QPL_COMPRESSION) target_link_libraries(_qpl - PRIVATE accel-config - PRIVATE ${CMAKE_DL_LIBS}) + PRIVATE ch_contrib::accel-config + PRIVATE ch_contrib::isal) + +target_include_directories(_qpl SYSTEM BEFORE + PUBLIC "${QPL_PROJECT_DIR}/include" + PUBLIC ${UUID_DIR}) add_library (ch_contrib::qpl ALIAS _qpl) -target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include") diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh b/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh new file mode 100644 index 00000000000..31017b565b6 --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/allin1_ssb.sh @@ -0,0 +1,530 @@ +#!/bin/bash +ckhost="localhost" +ckport=("9000" "9001" "9002" "9003") +WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.." +OUTPUT_DIR="${WORKING_DIR}/output" +LOG_DIR="${OUTPUT_DIR}/log" +RAWDATA_DIR="${WORKING_DIR}/rawdata_dir" +database_dir="${WORKING_DIR}/database_dir" +CLIENT_SCRIPTS_DIR="${WORKING_DIR}/client_scripts" +LOG_PACK_FILE="$(date +%Y-%m-%d-%H-%M-%S)" +QUERY_FILE="queries_ssb.sql" +SERVER_BIND_CMD[0]="numactl -m 0 -N 0" +SERVER_BIND_CMD[1]="numactl -m 0 -N 0" +SERVER_BIND_CMD[2]="numactl -m 1 -N 1" +SERVER_BIND_CMD[3]="numactl -m 1 -N 1" +CLIENT_BIND_CMD="" +SSB_GEN_FACTOR=20 +TABLE_NAME="lineorder_flat" +TALBE_ROWS="119994608" +CODEC_CONFIG="lz4 deflate zstd" + +# define instance number +inst_num=$1 +if [ ! -n "$1" ]; then + echo "Please clarify instance number from 1,2,3 or 4" + exit 1 +else + echo "Benchmarking with instance number:$1" +fi + +if [ ! -d "$OUTPUT_DIR" ]; then +mkdir $OUTPUT_DIR +fi +if [ ! -d "$LOG_DIR" ]; then +mkdir $LOG_DIR +fi +if [ ! -d "$RAWDATA_DIR" ]; then +mkdir $RAWDATA_DIR +fi + +# define different directories +dir_server=("" "_s2" "_s3" "_s4") +ckreadSql=" + CREATE TABLE customer + ( + C_CUSTKEY UInt32, + C_NAME String, + C_ADDRESS String, + C_CITY LowCardinality(String), + C_NATION LowCardinality(String), + C_REGION LowCardinality(String), + C_PHONE String, + C_MKTSEGMENT LowCardinality(String) + ) + ENGINE = MergeTree ORDER BY (C_CUSTKEY); + + CREATE TABLE lineorder + ( + LO_ORDERKEY UInt32, + LO_LINENUMBER UInt8, + LO_CUSTKEY UInt32, + LO_PARTKEY UInt32, + LO_SUPPKEY UInt32, + LO_ORDERDATE Date, + LO_ORDERPRIORITY LowCardinality(String), + LO_SHIPPRIORITY UInt8, + LO_QUANTITY UInt8, + LO_EXTENDEDPRICE UInt32, + LO_ORDTOTALPRICE UInt32, + LO_DISCOUNT UInt8, + LO_REVENUE UInt32, + LO_SUPPLYCOST UInt32, + LO_TAX UInt8, + LO_COMMITDATE Date, + LO_SHIPMODE LowCardinality(String) + ) + ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY); + + CREATE TABLE part + ( + P_PARTKEY UInt32, + P_NAME String, + P_MFGR LowCardinality(String), + P_CATEGORY LowCardinality(String), + P_BRAND LowCardinality(String), + P_COLOR LowCardinality(String), + P_TYPE LowCardinality(String), + P_SIZE UInt8, + P_CONTAINER LowCardinality(String) + ) + ENGINE = MergeTree ORDER BY P_PARTKEY; + + CREATE TABLE supplier + ( + S_SUPPKEY UInt32, + S_NAME String, + S_ADDRESS String, + S_CITY LowCardinality(String), + S_NATION LowCardinality(String), + S_REGION LowCardinality(String), + S_PHONE String + ) + ENGINE = MergeTree ORDER BY S_SUPPKEY; +" +supplier_table=" + CREATE TABLE supplier + ( + S_SUPPKEY UInt32, + S_NAME String, + S_ADDRESS String, + S_CITY LowCardinality(String), + S_NATION LowCardinality(String), + S_REGION LowCardinality(String), + S_PHONE String + ) + ENGINE = MergeTree ORDER BY S_SUPPKEY; +" +part_table=" + CREATE TABLE part + ( + P_PARTKEY UInt32, + P_NAME String, + P_MFGR LowCardinality(String), + P_CATEGORY LowCardinality(String), + P_BRAND LowCardinality(String), + P_COLOR LowCardinality(String), + P_TYPE LowCardinality(String), + P_SIZE UInt8, + P_CONTAINER LowCardinality(String) + ) + ENGINE = MergeTree ORDER BY P_PARTKEY; +" +lineorder_table=" + CREATE TABLE lineorder + ( + LO_ORDERKEY UInt32, + LO_LINENUMBER UInt8, + LO_CUSTKEY UInt32, + LO_PARTKEY UInt32, + LO_SUPPKEY UInt32, + LO_ORDERDATE Date, + LO_ORDERPRIORITY LowCardinality(String), + LO_SHIPPRIORITY UInt8, + LO_QUANTITY UInt8, + LO_EXTENDEDPRICE UInt32, + LO_ORDTOTALPRICE UInt32, + LO_DISCOUNT UInt8, + LO_REVENUE UInt32, + LO_SUPPLYCOST UInt32, + LO_TAX UInt8, + LO_COMMITDATE Date, + LO_SHIPMODE LowCardinality(String) + ) + ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY); +" +customer_table=" + CREATE TABLE customer + ( + C_CUSTKEY UInt32, + C_NAME String, + C_ADDRESS String, + C_CITY LowCardinality(String), + C_NATION LowCardinality(String), + C_REGION LowCardinality(String), + C_PHONE String, + C_MKTSEGMENT LowCardinality(String) + ) + ENGINE = MergeTree ORDER BY (C_CUSTKEY); +" + +lineorder_flat_table=" + SET max_memory_usage = 20000000000; + CREATE TABLE lineorder_flat + ENGINE = MergeTree + PARTITION BY toYear(LO_ORDERDATE) + ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS + SELECT + l.LO_ORDERKEY AS LO_ORDERKEY, + l.LO_LINENUMBER AS LO_LINENUMBER, + l.LO_CUSTKEY AS LO_CUSTKEY, + l.LO_PARTKEY AS LO_PARTKEY, + l.LO_SUPPKEY AS LO_SUPPKEY, + l.LO_ORDERDATE AS LO_ORDERDATE, + l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY, + l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY, + l.LO_QUANTITY AS LO_QUANTITY, + l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE, + l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE, + l.LO_DISCOUNT AS LO_DISCOUNT, + l.LO_REVENUE AS LO_REVENUE, + l.LO_SUPPLYCOST AS LO_SUPPLYCOST, + l.LO_TAX AS LO_TAX, + l.LO_COMMITDATE AS LO_COMMITDATE, + l.LO_SHIPMODE AS LO_SHIPMODE, + c.C_NAME AS C_NAME, + c.C_ADDRESS AS C_ADDRESS, + c.C_CITY AS C_CITY, + c.C_NATION AS C_NATION, + c.C_REGION AS C_REGION, + c.C_PHONE AS C_PHONE, + c.C_MKTSEGMENT AS C_MKTSEGMENT, + s.S_NAME AS S_NAME, + s.S_ADDRESS AS S_ADDRESS, + s.S_CITY AS S_CITY, + s.S_NATION AS S_NATION, + s.S_REGION AS S_REGION, + s.S_PHONE AS S_PHONE, + p.P_NAME AS P_NAME, + p.P_MFGR AS P_MFGR, + p.P_CATEGORY AS P_CATEGORY, + p.P_BRAND AS P_BRAND, + p.P_COLOR AS P_COLOR, + p.P_TYPE AS P_TYPE, + p.P_SIZE AS P_SIZE, + p.P_CONTAINER AS P_CONTAINER + FROM lineorder AS l + INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY + INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY + INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY; + show settings ilike 'max_memory_usage'; +" + +function insert_data(){ + echo "insert_data:$1" + create_table_prefix="clickhouse client --host ${ckhost} --port $2 --multiquery -q" + insert_data_prefix="clickhouse client --query " + case $1 in + all) + clickhouse client --host ${ckhost} --port $2 --multiquery -q"$ckreadSql" && { + ${insert_data_prefix} "INSERT INTO customer FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/customer.tbl --port=$2 + ${insert_data_prefix} "INSERT INTO part FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/part.tbl --port=$2 + ${insert_data_prefix} "INSERT INTO supplier FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl --port=$2 + ${insert_data_prefix} "INSERT INTO lineorder FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl --port=$2 + } + ${create_table_prefix}"${lineorder_flat_table}" + ;; + customer) + echo ${create_table_prefix}\"${customer_table}\" + ${create_table_prefix}"${customer_table}" && { + echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2" + ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2 + } + ;; + part) + echo ${create_table_prefix}\"${part_table}\" + ${create_table_prefix}"${part_table}" && { + echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2" + ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2 + } + ;; + supplier) + echo ${create_table_prefix}"${supplier_table}" + ${create_table_prefix}"${supplier_table}" && { + echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2" + ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2 + } + ;; + lineorder) + echo ${create_table_prefix}"${lineorder_table}" + ${create_table_prefix}"${lineorder_table}" && { + echo "${insert_data_prefix} \"INSERT INTO $1 FORMAT CSV\" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2" + ${insert_data_prefix} "INSERT INTO $1 FORMAT CSV" < ${RAWDATA_DIR}/ssb-dbgen/$1.tbl --port=$2 + } + ;; + lineorder_flat) + echo ${create_table_prefix}"${lineorder_flat_table}" + ${create_table_prefix}"${lineorder_flat_table}" + return 0 + ;; + *) + exit 0 + ;; + + esac +} + +function check_sql(){ + select_sql="select * from "$1" limit 1" + clickhouse client --host ${ckhost} --port $2 --multiquery -q"${select_sql}" +} + +function check_table(){ + checknum=0 + source_tables="customer part supplier lineorder lineorder_flat" + test_tables=${1:-${source_tables}} + echo "Checking table data required in server..." + for i in $(seq 0 $[inst_num-1]) + do + for j in `echo ${test_tables}` + do + check_sql $j ${ckport[i]} &> /dev/null || { + let checknum+=1 && insert_data "$j" ${ckport[i]} + } + done + done + + for i in $(seq 0 $[inst_num-1]) + do + echo "clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q\"select count() from ${TABLE_NAME};\"" + var=$(clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"select count() from ${TABLE_NAME};") + if [ $var -eq $TALBE_ROWS ];then + echo "Instance_${i} Table data integrity check OK -> Rows:$var" + else + echo "Instance_${i} Table data integrity check Failed -> Rows:$var" + exit 1 + fi + done + if [ $checknum -gt 0 ];then + echo "Need sleep 10s after first table data insertion...$checknum" + sleep 10 + fi +} + +function check_instance(){ +instance_alive=0 +for i in {1..10} +do + sleep 1 + netstat -nltp | grep ${1} > /dev/null + if [ $? -ne 1 ];then + instance_alive=1 + break + fi + +done + +if [ $instance_alive -eq 0 ];then + echo "check_instance -> clickhouse server instance faild to launch due to 10s timeout!" + exit 1 +else + echo "check_instance -> clickhouse server instance launch successfully!" +fi +} + +function start_clickhouse_for_insertion(){ + echo "start_clickhouse_for_insertion" + for i in $(seq 0 $[inst_num-1]) + do + echo "cd ${database_dir}/$1${dir_server[i]}" + echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null" + + cd ${database_dir}/$1${dir_server[i]} + ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&${LOG_DIR}/${1}_${i}_server_log& > /dev/null + check_instance ${ckport[i]} + done +} + +function start_clickhouse_for_stressing(){ + echo "start_clickhouse_for_stressing" + for i in $(seq 0 $[inst_num-1]) + do + echo "cd ${database_dir}/$1${dir_server[i]}" + echo "${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null&" + + cd ${database_dir}/$1${dir_server[i]} + ${SERVER_BIND_CMD[i]} clickhouse server -C config_${1}${dir_server[i]}.xml >&/dev/null& + check_instance ${ckport[i]} + done +} +yum -y install git make gcc sudo net-tools &> /dev/null +pip3 install clickhouse_driver numpy &> /dev/null +test -d ${RAWDATA_DIR}/ssb-dbgen || git clone https://github.com/vadimtk/ssb-dbgen.git ${RAWDATA_DIR}/ssb-dbgen && cd ${RAWDATA_DIR}/ssb-dbgen + +if [ ! -f ${RAWDATA_DIR}/ssb-dbgen/dbgen ];then + make && { + test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y |./dbgen -s ${SSB_GEN_FACTOR} -T c + test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p + test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s + test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d + test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l + } +else + test -f ${RAWDATA_DIR}/ssb-dbgen/customer.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T c + test -f ${RAWDATA_DIR}/ssb-dbgen/part.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T p + test -f ${RAWDATA_DIR}/ssb-dbgen/supplier.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T s + test -f ${RAWDATA_DIR}/ssb-dbgen/date.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T d + test -f ${RAWDATA_DIR}/ssb-dbgen/lineorder.tbl || echo y | ./dbgen -s ${SSB_GEN_FACTOR} -T l + +fi + +filenum=`find ${RAWDATA_DIR}/ssb-dbgen/ -name "*.tbl" | wc -l` + +if [ $filenum -ne 5 ];then + echo "generate ssb data file *.tbl faild" + exit 1 +fi + +function kill_instance(){ +instance_alive=1 +for i in {1..2} +do + pkill clickhouse && sleep 5 + instance_alive=0 + for i in $(seq 0 $[inst_num-1]) + do + netstat -nltp | grep ${ckport[i]} > /dev/null + if [ $? -ne 1 ];then + instance_alive=1 + break; + fi + done + if [ $instance_alive -eq 0 ];then + break; + fi +done +if [ $instance_alive -eq 0 ];then + echo "kill_instance OK!" +else + echo "kill_instance Failed -> clickhouse server instance still alive due to 10s timeout" + exit 1 +fi +} + +function run_test(){ +is_xml=0 +for i in $(seq 0 $[inst_num-1]) +do + if [ -f ${database_dir}/${1}${dir_server[i]}/config_${1}${dir_server[i]}.xml ]; then + is_xml=$[is_xml+1] + fi +done +if [ $is_xml -eq $inst_num ];then + echo "Benchmark with $inst_num instance" + start_clickhouse_for_insertion ${1} + + for i in $(seq 0 $[inst_num-1]) + do + clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null + done + + if [ $? -eq 0 ];then + check_table + fi + kill_instance + + if [ $1 == "deflate" ];then + test -f ${LOG_DIR}/${1}_server_log && deflatemsg=`cat ${LOG_DIR}/${1}_server_log | grep DeflateJobHWPool` + if [ -n "$deflatemsg" ];then + echo ------------------------------------------------------ + echo $deflatemsg + echo ------------------------------------------------------ + fi + fi + echo "Check table data required in server_${1} -> Done! " + + start_clickhouse_for_stressing ${1} + for i in $(seq 0 $[inst_num-1]) + do + clickhouse client --host ${ckhost} --port ${ckport[i]} -m -q"show databases;" >/dev/null + done + if [ $? -eq 0 ];then + test -d ${CLIENT_SCRIPTS_DIR} && cd ${CLIENT_SCRIPTS_DIR} + echo "Client stressing... " + echo "${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log" + ${CLIENT_BIND_CMD} python3 client_stressing_test.py ${QUERY_FILE} $inst_num &> ${LOG_DIR}/${1}.log + echo "Completed client stressing, checking log... " + finish_log=`grep "Finished" ${LOG_DIR}/${1}.log | wc -l` + if [ $finish_log -eq 1 ] ;then + kill_instance + test -f ${LOG_DIR}/${1}.log && echo "${1}.log ===> ${LOG_DIR}/${1}.log" + else + kill_instance + echo "No find 'Finished' in client log -> Performance test may fail" + exit 1 + + fi + + else + echo "${1} clickhouse server start fail" + exit 1 + fi +else + echo "clickhouse server start fail -> Please check xml files required in ${database_dir} for each instance" + exit 1 + +fi +} +function clear_log(){ + if [ -d "$LOG_DIR" ]; then + cd ${LOG_DIR} && rm -rf * + fi +} + +function gather_log_for_codec(){ + cd ${OUTPUT_DIR} && mkdir -p ${LOG_PACK_FILE}/${1} + cp -rf ${LOG_DIR} ${OUTPUT_DIR}/${LOG_PACK_FILE}/${1} +} + +function pack_log(){ + if [ -e "${OUTPUT_DIR}/run.log" ]; then + cp ${OUTPUT_DIR}/run.log ${OUTPUT_DIR}/${LOG_PACK_FILE}/ + fi + echo "Please check all log information in ${OUTPUT_DIR}/${LOG_PACK_FILE}" +} + +function setup_check(){ + + iax_dev_num=`accel-config list | grep iax | wc -l` + if [ $iax_dev_num -eq 0 ] ;then + iax_dev_num=`accel-config list | grep iax | wc -l` + if [ $iax_dev_num -eq 0 ] ;then + echo "No IAA devices available -> Please check IAA hardware setup manually!" + exit 1 + else + echo "IAA enabled devices number:$iax_dev_num" + fi + else + echo "IAA enabled devices number:$iax_dev_num" + fi + libaccel_version=`accel-config -v` + clickhouser_version=`clickhouse server --version` + kernel_dxd_log=`dmesg | grep dxd` + echo "libaccel_version:$libaccel_version" + echo "clickhouser_version:$clickhouser_version" + echo -e "idxd section in kernel log:\n$kernel_dxd_log" +} + +setup_check +export CLICKHOUSE_WATCHDOG_ENABLE=0 +for i in ${CODEC_CONFIG[@]} +do + clear_log + codec=${i} + echo "run test------------$codec" + run_test $codec + gather_log_for_codec $codec +done + +pack_log +echo "Done." \ No newline at end of file diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py b/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py new file mode 100644 index 00000000000..f12381a198c --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/client_stressing_test.py @@ -0,0 +1,278 @@ +from operator import eq +import os +import random +import time +import sys +from clickhouse_driver import Client +import numpy as np +import subprocess +import multiprocessing +from multiprocessing import Manager + +warmup_runs = 10 +calculated_runs = 10 +seconds = 30 +max_instances_number = 8 +retest_number = 3 +retest_tolerance = 10 + + +def checkInt(str): + try: + int(str) + return True + except ValueError: + return False + + +def setup_client(index): + if index < 4: + port_idx = index + else: + port_idx = index + 4 + client = Client( + host="localhost", + database="default", + user="default", + password="", + port="900%d" % port_idx, + ) + union_mode_query = "SET union_default_mode='DISTINCT'" + client.execute(union_mode_query) + return client + + +def warm_client(clientN, clientL, query, loop): + for c_idx in range(clientN): + for _ in range(loop): + clientL[c_idx].execute(query) + + +def read_queries(queries_list): + queries = list() + queries_id = list() + with open(queries_list, "r") as f: + for line in f: + line = line.rstrip() + line = line.split("$") + queries_id.append(line[0]) + queries.append(line[1]) + return queries_id, queries + + +def run_task(client, cname, query, loop, query_latency): + start_time = time.time() + for i in range(loop): + client.execute(query) + query_latency.append(client.last_query.elapsed) + + end_time = time.time() + p95 = np.percentile(query_latency, 95) + print( + "CLIENT: {0} end. -> P95: %f, qps: %f".format(cname) + % (p95, loop / (end_time - start_time)) + ) + + +def run_multi_clients(clientN, clientList, query, loop): + client_pids = {} + start_time = time.time() + manager = multiprocessing.Manager() + query_latency_list0 = manager.list() + query_latency_list1 = manager.list() + query_latency_list2 = manager.list() + query_latency_list3 = manager.list() + query_latency_list4 = manager.list() + query_latency_list5 = manager.list() + query_latency_list6 = manager.list() + query_latency_list7 = manager.list() + + for c_idx in range(clientN): + client_name = "Role_%d" % c_idx + if c_idx == 0: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list0), + ) + elif c_idx == 1: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list1), + ) + elif c_idx == 2: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list2), + ) + elif c_idx == 3: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list3), + ) + elif c_idx == 4: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list4), + ) + elif c_idx == 5: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list5), + ) + elif c_idx == 6: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list6), + ) + elif c_idx == 7: + client_pids[c_idx] = multiprocessing.Process( + target=run_task, + args=(clientList[c_idx], client_name, query, loop, query_latency_list7), + ) + else: + print("ERROR: CLIENT number dismatch!!") + exit() + print("CLIENT: %s start" % client_name) + client_pids[c_idx].start() + + for c_idx in range(clientN): + client_pids[c_idx].join() + end_time = time.time() + totalT = end_time - start_time + + query_latencyTotal = list() + for item in query_latency_list0: + query_latencyTotal.append(item) + for item in query_latency_list1: + query_latencyTotal.append(item) + for item in query_latency_list2: + query_latencyTotal.append(item) + for item in query_latency_list3: + query_latencyTotal.append(item) + for item in query_latency_list4: + query_latencyTotal.append(item) + for item in query_latency_list5: + query_latencyTotal.append(item) + for item in query_latency_list6: + query_latencyTotal.append(item) + for item in query_latency_list7: + query_latencyTotal.append(item) + + totalP95 = np.percentile(query_latencyTotal, 95) * 1000 + return totalT, totalP95 + + +def run_task_caculated(client, cname, query, loop): + query_latency = list() + start_time = time.time() + for i in range(loop): + client.execute(query) + query_latency.append(client.last_query.elapsed) + end_time = time.time() + p95 = np.percentile(query_latency, 95) + + +def run_multi_clients_caculated(clientN, clientList, query, loop): + client_pids = {} + start_time = time.time() + for c_idx in range(clientN): + client_name = "Role_%d" % c_idx + client_pids[c_idx] = multiprocessing.Process( + target=run_task_caculated, + args=(clientList[c_idx], client_name, query, loop), + ) + client_pids[c_idx].start() + for c_idx in range(clientN): + client_pids[c_idx].join() + end_time = time.time() + totalT = end_time - start_time + return totalT + + +if __name__ == "__main__": + client_number = 1 + queries = list() + queries_id = list() + + if len(sys.argv) != 3: + print( + "usage: python3 client_stressing_test.py [queries_file_path] [client_number]" + ) + sys.exit() + else: + queries_list = sys.argv[1] + client_number = int(sys.argv[2]) + print( + "queries_file_path: %s, client_number: %d" % (queries_list, client_number) + ) + if not os.path.isfile(queries_list) or not os.access(queries_list, os.R_OK): + print("please check the right path for queries file") + sys.exit() + if ( + not checkInt(sys.argv[2]) + or int(sys.argv[2]) > max_instances_number + or int(sys.argv[2]) < 1 + ): + print("client_number should be in [1~%d]" % max_instances_number) + sys.exit() + + client_list = {} + queries_id, queries = read_queries(queries_list) + + for c_idx in range(client_number): + client_list[c_idx] = setup_client(c_idx) + # clear cache + os.system("sync; echo 3 > /proc/sys/vm/drop_caches") + + print("###Polit Run Begin") + for i in queries: + warm_client(client_number, client_list, i, 1) + print("###Polit Run End -> Start stressing....") + + query_index = 0 + for q in queries: + print( + "\n###START -> Index: %d, ID: %s, Query: %s" + % (query_index, queries_id[query_index], q) + ) + warm_client(client_number, client_list, q, warmup_runs) + print("###Warm Done!") + for j in range(0, retest_number): + totalT = run_multi_clients_caculated( + client_number, client_list, q, calculated_runs + ) + curr_loop = int(seconds * calculated_runs / totalT) + 1 + print( + "###Calculation Done! -> loopN: %d, expected seconds:%d" + % (curr_loop, seconds) + ) + + print("###Stress Running! -> %d iterations......" % curr_loop) + + totalT, totalP95 = run_multi_clients( + client_number, client_list, q, curr_loop + ) + + if totalT > (seconds - retest_tolerance) and totalT < ( + seconds + retest_tolerance + ): + break + else: + print( + "###totalT:%d is far way from expected seconds:%d. Run again ->j:%d!" + % (totalT, seconds, j) + ) + + print( + "###Completed! -> ID: %s, clientN: %d, totalT: %.2f s, latencyAVG: %.2f ms, P95: %.2f ms, QPS_Final: %.2f" + % ( + queries_id[query_index], + client_number, + totalT, + totalT * 1000 / (curr_loop * client_number), + totalP95, + ((curr_loop * client_number) / totalT), + ) + ) + query_index += 1 + print("###Finished!") diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql b/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql new file mode 100644 index 00000000000..abf2df6503a --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/queries_ssb.sql @@ -0,0 +1,10 @@ +Q1.1$SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25; +Q2.1$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND; +Q2.2$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND >= 'MFGR#2221' AND P_BRAND <= 'MFGR#2228' AND S_REGION = 'ASIA' GROUP BY year,P_BRAND ORDER BY year,P_BRAND; +Q2.3$SELECT sum(LO_REVENUE),toYear(LO_ORDERDATE) AS year,P_BRAND FROM lineorder_flat WHERE P_BRAND = 'MFGR#2239' AND S_REGION = 'EUROPE' GROUP BY year,P_BRAND ORDER BY year,P_BRAND; +Q3.1$SELECT C_NATION,S_NATION,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997 GROUP BY C_NATION,S_NATION,year ORDER BY year ASC,revenue DESC; +Q3.2$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC; +Q3.3$SELECT C_CITY,S_CITY,toYear(LO_ORDERDATE) AS year,sum(LO_REVENUE) AS revenue FROM lineorder_flat WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997 GROUP BY C_CITY,S_CITY,year ORDER BY year ASC,revenue DESC; +Q4.1$SELECT toYear(LO_ORDERDATE) AS year,C_NATION,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,C_NATION ORDER BY year ASC,C_NATION ASC; +Q4.2$SELECT toYear(LO_ORDERDATE) AS year,S_NATION,P_CATEGORY,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2') GROUP BY year,S_NATION,P_CATEGORY ORDER BY year ASC,S_NATION ASC,P_CATEGORY ASC; +Q4.3$SELECT toYear(LO_ORDERDATE) AS year,S_CITY,P_BRAND,sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year,S_CITY,P_BRAND ORDER BY year ASC,S_CITY ASC,P_BRAND ASC; diff --git a/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh b/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh new file mode 100644 index 00000000000..6067b1058f2 --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/client_scripts/run_ssb.sh @@ -0,0 +1,6 @@ +WORKING_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.." +if [ ! -d "${WORKING_DIR}/output" ]; then +mkdir ${WORKING_DIR}/output +fi +bash allin1_ssb.sh 2 > ${WORKING_DIR}/output/run.log +echo "Please check log in: ${WORKING_DIR}/output/run.log" \ No newline at end of file diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml new file mode 100644 index 00000000000..ab77a9cdcbe --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate/config_deflate.xml @@ -0,0 +1,49 @@ + + + + + trace + true + + + 8123 + 9000 + 9004 + + ./ + + 8589934592 + 5368709120 + true + + + + deflate_qpl + + + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml new file mode 100644 index 00000000000..b71456486f5 --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/database_dir/deflate_s2/config_deflate_s2.xml @@ -0,0 +1,49 @@ + + + + + trace + true + + + 8124 + 9001 + 9005 + + ./ + + 8589934592 + 5368709120 + true + + + + deflate_qpl + + + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml new file mode 100644 index 00000000000..f4dc59b60aa --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4/config_lz4.xml @@ -0,0 +1,49 @@ + + + + + trace + true + + + 8123 + 9000 + 9004 + + ./ + + 8589934592 + 5368709120 + true + + + + lz4 + + + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml new file mode 100644 index 00000000000..357db8942d7 --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/database_dir/lz4_s2/config_lz4_s2.xml @@ -0,0 +1,49 @@ + + + + + trace + true + + + 8124 + 9001 + 9005 + + ./ + + 8589934592 + 5368709120 + true + + + + lz4 + + + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml new file mode 100644 index 00000000000..1c4c738edaf --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd/config_zstd.xml @@ -0,0 +1,49 @@ + + + + + trace + true + + + 8123 + 9000 + 9004 + + ./ + + 8589934592 + 5368709120 + true + + + + zstd + + + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml new file mode 100644 index 00000000000..f3db01b7739 --- /dev/null +++ b/contrib/qpl-cmake/benchmark_sample/database_dir/zstd_s2/config_zstd_s2.xml @@ -0,0 +1,49 @@ + + + + + trace + true + + + 8124 + 9001 + 9005 + + ./ + + 8589934592 + 5368709120 + true + + + + zstd + + + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/contrib/snappy-cmake/CMakeLists.txt b/contrib/snappy-cmake/CMakeLists.txt index 50cdc8732a1..f406de0e343 100644 --- a/contrib/snappy-cmake/CMakeLists.txt +++ b/contrib/snappy-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy") +set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/snappy") if (ARCH_S390X) set (SNAPPY_IS_BIG_ENDIAN 1) diff --git a/contrib/sparse-checkout/setup-sparse-checkout.sh b/contrib/sparse-checkout/setup-sparse-checkout.sh new file mode 100755 index 00000000000..3feba6c5adf --- /dev/null +++ b/contrib/sparse-checkout/setup-sparse-checkout.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -e + +git config submodule."contrib/llvm-project".update '!../sparse-checkout/update-llvm-project.sh' +git config submodule."contrib/croaring".update '!../sparse-checkout/update-croaring.sh' +git config submodule."contrib/aws".update '!../sparse-checkout/update-aws.sh' +git config submodule."contrib/openssl".update '!../sparse-checkout/update-openssl.sh' +git config submodule."contrib/boringssl".update '!../sparse-checkout/update-boringssl.sh' +git config submodule."contrib/arrow".update '!../sparse-checkout/update-arrow.sh' +git config submodule."contrib/grpc".update '!../sparse-checkout/update-grpc.sh' +git config submodule."contrib/orc".update '!../sparse-checkout/update-orc.sh' +git config submodule."contrib/h3".update '!../sparse-checkout/update-h3.sh' +git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh' +git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh' +git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh' +git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh' +git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh' +git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh' diff --git a/contrib/sparse-checkout/update-arrow.sh b/contrib/sparse-checkout/update-arrow.sh new file mode 100755 index 00000000000..e004b60da02 --- /dev/null +++ b/contrib/sparse-checkout/update-arrow.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for arrow" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/cpp/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-aws-s2n-tls.sh b/contrib/sparse-checkout/update-aws-s2n-tls.sh new file mode 100755 index 00000000000..4d65dc4b81d --- /dev/null +++ b/contrib/sparse-checkout/update-aws-s2n-tls.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for aws-s2n-tls" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '!/docs/*' >> $FILES_TO_CHECKOUT +echo '!/compliance/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-aws.sh b/contrib/sparse-checkout/update-aws.sh new file mode 100755 index 00000000000..f86acb54d95 --- /dev/null +++ b/contrib/sparse-checkout/update-aws.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for aws" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT +echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-boost.sh b/contrib/sparse-checkout/update-boost.sh new file mode 100755 index 00000000000..9bd1f6c1796 --- /dev/null +++ b/contrib/sparse-checkout/update-boost.sh @@ -0,0 +1,85 @@ +#!/bin/sh + +echo "Using sparse checkout for boost" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/boost/*' > $FILES_TO_CHECKOUT +echo '!/boost/*/*' >> $FILES_TO_CHECKOUT +echo '/boost/algorithm/*' >> $FILES_TO_CHECKOUT +echo '/boost/any/*' >> $FILES_TO_CHECKOUT +echo '/boost/atomic/*' >> $FILES_TO_CHECKOUT +echo '/boost/assert/*' >> $FILES_TO_CHECKOUT +echo '/boost/bind/*' >> $FILES_TO_CHECKOUT +echo '/boost/concept/*' >> $FILES_TO_CHECKOUT +echo '/boost/config/*' >> $FILES_TO_CHECKOUT +echo '/boost/container/*' >> $FILES_TO_CHECKOUT +echo '/boost/container_hash/*' >> $FILES_TO_CHECKOUT +echo '/boost/context/*' >> $FILES_TO_CHECKOUT +echo '/boost/convert/*' >> $FILES_TO_CHECKOUT +echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT +echo '/boost/core/*' >> $FILES_TO_CHECKOUT +echo '/boost/detail/*' >> $FILES_TO_CHECKOUT +echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT +echo '/boost/exception/*' >> $FILES_TO_CHECKOUT +echo '/boost/filesystem/*' >> $FILES_TO_CHECKOUT +echo '/boost/functional/*' >> $FILES_TO_CHECKOUT +echo '/boost/function/*' >> $FILES_TO_CHECKOUT +echo '/boost/geometry/*' >> $FILES_TO_CHECKOUT +echo '/boost/graph/*' >> $FILES_TO_CHECKOUT +echo '/boost/heap/*' >> $FILES_TO_CHECKOUT +echo '/boost/integer/*' >> $FILES_TO_CHECKOUT +echo '/boost/intrusive/*' >> $FILES_TO_CHECKOUT +echo '/boost/iostreams/*' >> $FILES_TO_CHECKOUT +echo '/boost/io/*' >> $FILES_TO_CHECKOUT +echo '/boost/iterator/*' >> $FILES_TO_CHECKOUT +echo '/boost/math/*' >> $FILES_TO_CHECKOUT +echo '/boost/move/*' >> $FILES_TO_CHECKOUT +echo '/boost/mpl/*' >> $FILES_TO_CHECKOUT +echo '/boost/multi_index/*' >> $FILES_TO_CHECKOUT +echo '/boost/multiprecision/*' >> $FILES_TO_CHECKOUT +echo '/boost/numeric/*' >> $FILES_TO_CHECKOUT +echo '/boost/predef/*' >> $FILES_TO_CHECKOUT +echo '/boost/preprocessor/*' >> $FILES_TO_CHECKOUT +echo '/boost/program_options/*' >> $FILES_TO_CHECKOUT +echo '/boost/range/*' >> $FILES_TO_CHECKOUT +echo '/boost/regex/*' >> $FILES_TO_CHECKOUT +echo '/boost/smart_ptr/*' >> $FILES_TO_CHECKOUT +echo '/boost/type_index/*' >> $FILES_TO_CHECKOUT +echo '/boost/type_traits/*' >> $FILES_TO_CHECKOUT +echo '/boost/system/*' >> $FILES_TO_CHECKOUT +echo '/boost/tti/*' >> $FILES_TO_CHECKOUT +echo '/boost/utility/*' >> $FILES_TO_CHECKOUT +echo '/boost/lexical_cast/*' >> $FILES_TO_CHECKOUT +echo '/boost/optional/*' >> $FILES_TO_CHECKOUT +echo '/boost/property_map/*' >> $FILES_TO_CHECKOUT +echo '/boost/pending/*' >> $FILES_TO_CHECKOUT +echo '/boost/multi_array/*' >> $FILES_TO_CHECKOUT +echo '/boost/tuple/*' >> $FILES_TO_CHECKOUT +echo '/boost/icl/*' >> $FILES_TO_CHECKOUT +echo '/boost/unordered/*' >> $FILES_TO_CHECKOUT +echo '/boost/typeof/*' >> $FILES_TO_CHECKOUT +echo '/boost/parameter/*' >> $FILES_TO_CHECKOUT +echo '/boost/mp11/*' >> $FILES_TO_CHECKOUT +echo '/boost/archive/*' >> $FILES_TO_CHECKOUT +echo '/boost/function_types/*' >> $FILES_TO_CHECKOUT +echo '/boost/serialization/*' >> $FILES_TO_CHECKOUT +echo '/boost/fusion/*' >> $FILES_TO_CHECKOUT +echo '/boost/variant/*' >> $FILES_TO_CHECKOUT +echo '/boost/format/*' >> $FILES_TO_CHECKOUT +echo '/boost/locale/*' >> $FILES_TO_CHECKOUT +echo '/boost/random/*' >> $FILES_TO_CHECKOUT +echo '/boost/spirit/*' >> $FILES_TO_CHECKOUT +echo '/boost/uuid/*' >> $FILES_TO_CHECKOUT +echo '/boost/xpressive/*' >> $FILES_TO_CHECKOUT +echo '/boost/asio/*' >> $FILES_TO_CHECKOUT +echo '/boost/circular_buffer/*' >> $FILES_TO_CHECKOUT +echo '/boost/proto/*' >> $FILES_TO_CHECKOUT +echo '/boost/qvm/*' >> $FILES_TO_CHECKOUT +echo '/boost/property_tree/*' >> $FILES_TO_CHECKOUT +echo '/libs/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD \ No newline at end of file diff --git a/contrib/sparse-checkout/update-boringssl.sh b/contrib/sparse-checkout/update-boringssl.sh new file mode 100755 index 00000000000..f877a78afed --- /dev/null +++ b/contrib/sparse-checkout/update-boringssl.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +echo "Using sparse checkout for boringsll" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/fuzz/*' >> $FILES_TO_CHECKOUT +echo '!/crypto/cipher_extra/test/*' >> $FILES_TO_CHECKOUT +echo '!/third_party/wycheproof_testvectors/*' >> $FILES_TO_CHECKOUT +echo '!/third_party/googletest/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-brotli.sh b/contrib/sparse-checkout/update-brotli.sh new file mode 100755 index 00000000000..8784f5e4125 --- /dev/null +++ b/contrib/sparse-checkout/update-brotli.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for brotli" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/c/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-croaring.sh b/contrib/sparse-checkout/update-croaring.sh new file mode 100755 index 00000000000..9b7bba19df4 --- /dev/null +++ b/contrib/sparse-checkout/update-croaring.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for croaring" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/benchmarks/*' >> $FILES_TO_CHECKOUT +echo '!/tests/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh new file mode 100755 index 00000000000..38934fdbc1b --- /dev/null +++ b/contrib/sparse-checkout/update-grpc.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +echo "Using sparse checkout for grpc" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '/test/build/*' >> $FILES_TO_CHECKOUT +echo '!/tools/*' >> $FILES_TO_CHECKOUT +echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT +echo '!/examples/*' >> $FILES_TO_CHECKOUT +echo '!/doc/*' >> $FILES_TO_CHECKOUT +# FIXME why do we need csharp? +#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT +echo '!/src/python/*' >> $FILES_TO_CHECKOUT +echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT +echo '!/src/php/*' >> $FILES_TO_CHECKOUT +echo '!/src/ruby/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-h3.sh b/contrib/sparse-checkout/update-h3.sh new file mode 100755 index 00000000000..127885f89cc --- /dev/null +++ b/contrib/sparse-checkout/update-h3.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for h3" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/tests/*' >> $FILES_TO_CHECKOUT +echo '!/website/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-icu.sh b/contrib/sparse-checkout/update-icu.sh new file mode 100755 index 00000000000..76af39f07a4 --- /dev/null +++ b/contrib/sparse-checkout/update-icu.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for icu" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/icu4c/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD \ No newline at end of file diff --git a/contrib/sparse-checkout/update-libxml2.sh b/contrib/sparse-checkout/update-libxml2.sh new file mode 100755 index 00000000000..24faf11eec9 --- /dev/null +++ b/contrib/sparse-checkout/update-libxml2.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +echo "Using sparse checkout for libxml2" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/result/*' >> $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '!/doc/*' >> $FILES_TO_CHECKOUT +echo '!/os400/*' >> $FILES_TO_CHECKOUT +echo '!/fuzz/*' >> $FILES_TO_CHECKOUT +echo '!/python/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-llvm-project.sh b/contrib/sparse-checkout/update-llvm-project.sh new file mode 100755 index 00000000000..53c3b691d3a --- /dev/null +++ b/contrib/sparse-checkout/update-llvm-project.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +echo "Using sparse checkout for llvm-project" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/llvm/*' >> $FILES_TO_CHECKOUT +echo '!/llvm/*/*' >> $FILES_TO_CHECKOUT +echo '/llvm/cmake/*' >> $FILES_TO_CHECKOUT +echo '/llvm/projects/*' >> $FILES_TO_CHECKOUT +echo '/llvm/include/*' >> $FILES_TO_CHECKOUT +echo '/llvm/lib/*' >> $FILES_TO_CHECKOUT +echo '/llvm/utils/TableGen/*' >> $FILES_TO_CHECKOUT +echo '/libcxxabi/*' >> $FILES_TO_CHECKOUT +echo '!/libcxxabi/test/*' >> $FILES_TO_CHECKOUT +echo '/libcxx/*' >> $FILES_TO_CHECKOUT +echo '!/libcxx/test/*' >> $FILES_TO_CHECKOUT +echo '/libunwind/*' >> $FILES_TO_CHECKOUT +echo '!/libunwind/test/*' >> $FILES_TO_CHECKOUT +echo '/compiler-rt/*' >> $FILES_TO_CHECKOUT +echo '!/compiler-rt/test/*' >> $FILES_TO_CHECKOUT +echo '/cmake/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-openssl.sh b/contrib/sparse-checkout/update-openssl.sh new file mode 100755 index 00000000000..33e19f43cb7 --- /dev/null +++ b/contrib/sparse-checkout/update-openssl.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +echo "Using sparse checkout for openssl" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/fuzz/*' >> $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '!/doc/*' >> $FILES_TO_CHECKOUT +echo '!/providers/*' >> $FILES_TO_CHECKOUT +echo '!/apps/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-orc.sh b/contrib/sparse-checkout/update-orc.sh new file mode 100755 index 00000000000..57ab57a8d52 --- /dev/null +++ b/contrib/sparse-checkout/update-orc.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for orc" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/c++/*' >> $FILES_TO_CHECKOUT +echo '/proto/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-protobuf.sh b/contrib/sparse-checkout/update-protobuf.sh new file mode 100755 index 00000000000..31c037c2cf5 --- /dev/null +++ b/contrib/sparse-checkout/update-protobuf.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for protobuf" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '!/*' > $FILES_TO_CHECKOUT +echo '/*/*' >> $FILES_TO_CHECKOUT +echo '/src/*' >> $FILES_TO_CHECKOUT +echo '/cmake/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sqlite-amalgamation b/contrib/sqlite-amalgamation index 400ad7152a0..20598079891 160000 --- a/contrib/sqlite-amalgamation +++ b/contrib/sqlite-amalgamation @@ -1 +1 @@ -Subproject commit 400ad7152a0c7ee07756d96ab4f6a8f6d1080916 +Subproject commit 20598079891d27ef1a3ad3f66bbfa3f983c25268 diff --git a/contrib/sysroot b/contrib/sysroot index f0081b2649b..e0d1b64da66 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit f0081b2649b94837855f3bc7d05ef326b100bad8 +Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh new file mode 100755 index 00000000000..c94681e6240 --- /dev/null +++ b/contrib/update-submodules.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +WORKDIR=$(dirname "$0") +WORKDIR=$(readlink -f "${WORKDIR}") + +"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh" +git submodule init +git submodule sync +git submodule update --depth=1 diff --git a/contrib/vectorscan b/contrib/vectorscan index f6250ae3e5a..38431d11178 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2 +Subproject commit 38431d111781843741a781a57a6381a527d900a4 diff --git a/contrib/zlib-ng-cmake/CMakeLists.txt b/contrib/zlib-ng-cmake/CMakeLists.txt index aa067ba37e0..79f343bfc75 100644 --- a/contrib/zlib-ng-cmake/CMakeLists.txt +++ b/contrib/zlib-ng-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -set (SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/zlib-ng) +set (SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/zlib-ng) add_definitions(-DZLIB_COMPAT) add_definitions(-DWITH_GZFILEOP) diff --git a/contrib/zstd b/contrib/zstd index 945f27758c0..63779c79823 160000 --- a/contrib/zstd +++ b/contrib/zstd @@ -1 +1 @@ -Subproject commit 945f27758c0fd67b636103a38dbf050266c6b90a +Subproject commit 63779c798237346c2b245c546c40b72a5a5913fe diff --git a/docker/bare/README.md b/docker/bare/README.md index 0a610b1de90..4baf308f1e1 100644 --- a/docker/bare/README.md +++ b/docker/bare/README.md @@ -1,6 +1,6 @@ ## The bare minimum ClickHouse Docker image. -It is intented as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel. +It is intended as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel. Example usage: diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile index 5946687dbef..3ca2bdafcb3 100644 --- a/docker/docs/builder/Dockerfile +++ b/docker/docs/builder/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #36968 # docker build -t clickhouse/docs-builder . # nodejs 17 prefers ipv6 and is broken in our environment FROM node:16-alpine diff --git a/docker/images.json b/docker/images.json index 508138d79af..b4f3e755bd1 100644 --- a/docker/images.json +++ b/docker/images.json @@ -123,7 +123,8 @@ "docker/test/stateless", "docker/test/integration/base", "docker/test/fuzzer", - "docker/test/keeper-jepsen" + "docker/test/keeper-jepsen", + "docker/test/server-jepsen" ] }, "docker/test/integration/kerberized_hadoop": { @@ -139,6 +140,10 @@ "name": "clickhouse/keeper-jepsen-test", "dependent": [] }, + "docker/test/server-jepsen": { + "name": "clickhouse/server-jepsen-test", + "dependent": [] + }, "docker/test/install/deb": { "name": "clickhouse/install-deb-test", "dependent": [] @@ -151,5 +156,9 @@ "name": "clickhouse/docs-builder", "dependent": [ ] + }, + "docker/test/sqllogic": { + "name": "clickhouse/sqllogic-test", + "dependent": [] } } diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 09395befdad..73da4515ff4 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -1,3 +1,6 @@ +# The Dockerfile.ubuntu exists for the tests/ci/docker_server.py script +# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically, +# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes FROM ubuntu:20.04 AS glibc-donor ARG TARGETARCH @@ -29,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.2.1.2537" +ARG VERSION="23.4.2.11" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/keeper/Dockerfile.ubuntu b/docker/keeper/Dockerfile.ubuntu new file mode 120000 index 00000000000..1d1fe94df49 --- /dev/null +++ b/docker/keeper/Dockerfile.ubuntu @@ -0,0 +1 @@ +Dockerfile \ No newline at end of file diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 62e6d47c183..fa860b2207f 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -69,13 +69,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ libc6 \ libc6-dev \ libc6-dev-arm64-cross \ + python3-boto3 \ yasm \ zstd \ && apt-get clean \ && rm -rf /var/lib/apt/lists # Download toolchain and SDK for Darwin -RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz +RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH @@ -97,7 +98,7 @@ ENV PATH="$PATH:/usr/local/go/bin" ENV GOPATH=/workdir/go ENV GOCACHE=/workdir/ -ARG CLANG_TIDY_SHA1=03644275e794b0587849bfc2ec6123d5ae0bdb1c +ARG CLANG_TIDY_SHA1=c191254ea00d47ade11d7170ef82fe038c213774 RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 24dca72e946..2cd0a011013 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -6,6 +6,7 @@ exec &> >(ts) ccache_status () { ccache --show-config ||: ccache --show-stats ||: + SCCACHE_NO_DAEMON=1 sccache --show-stats ||: } [ -O /build ] || git config --global --add safe.directory /build diff --git a/docker/packager/packager b/docker/packager/packager index 58dd299fd6d..a894fe2d8e9 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -5,13 +5,19 @@ import os import argparse import logging import sys -from typing import List +from pathlib import Path +from typing import List, Optional -SCRIPT_PATH = os.path.realpath(__file__) +SCRIPT_PATH = Path(__file__).absolute() IMAGE_TYPE = "binary" +IMAGE_NAME = f"clickhouse/{IMAGE_TYPE}-builder" -def check_image_exists_locally(image_name): +class BuildException(Exception): + pass + + +def check_image_exists_locally(image_name: str) -> bool: try: output = subprocess.check_output( f"docker images -q {image_name} 2> /dev/null", shell=True @@ -21,17 +27,17 @@ def check_image_exists_locally(image_name): return False -def pull_image(image_name): +def pull_image(image_name: str) -> bool: try: subprocess.check_call(f"docker pull {image_name}", shell=True) return True except subprocess.CalledProcessError: - logging.info(f"Cannot pull image {image_name}".format()) + logging.info("Cannot pull image %s", image_name) return False -def build_image(image_name, filepath): - context = os.path.dirname(filepath) +def build_image(image_name: str, filepath: Path) -> None: + context = filepath.parent build_cmd = f"docker build --network=host -t {image_name} -f {filepath} {context}" logging.info("Will build image with cmd: '%s'", build_cmd) subprocess.check_call( @@ -40,7 +46,7 @@ def build_image(image_name, filepath): ) -def pre_build(repo_path: str, env_variables: List[str]): +def pre_build(repo_path: Path, env_variables: List[str]): if "WITH_PERFORMANCE=1" in env_variables: current_branch = subprocess.check_output( "git branch --show-current", shell=True, encoding="utf-8" @@ -56,7 +62,9 @@ def pre_build(repo_path: str, env_variables: List[str]): # conclusion is: in the current state the easiest way to go is to force # unshallow repository for performance artifacts. # To change it we need to rework our performance tests docker image - raise Exception("shallow repository is not suitable for performance builds") + raise BuildException( + "shallow repository is not suitable for performance builds" + ) if current_branch != "master": cmd = ( f"git -C {repo_path} fetch --no-recurse-submodules " @@ -67,14 +75,14 @@ def pre_build(repo_path: str, env_variables: List[str]): def run_docker_image_with_env( - image_name, - as_root, - output, - env_variables, - ch_root, - ccache_dir, - docker_image_version, + image_name: str, + as_root: bool, + output_dir: Path, + env_variables: List[str], + ch_root: Path, + ccache_dir: Optional[Path], ): + output_dir.mkdir(parents=True, exist_ok=True) env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -89,10 +97,14 @@ def run_docker_image_with_env( else: user = f"{os.geteuid()}:{os.getegid()}" + ccache_mount = f"--volume={ccache_dir}:/ccache" + if ccache_dir is None: + ccache_mount = "" + cmd = ( - f"docker run --network=host --user={user} --rm --volume={output}:/output " - f"--volume={ch_root}:/build --volume={ccache_dir}:/ccache {env_part} " - f"{interactive} {image_name}:{docker_image_version}" + f"docker run --network=host --user={user} --rm {ccache_mount}" + f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} " + f"{interactive} {image_name}" ) logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) @@ -100,24 +112,25 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type, package_type, sanitizer): +def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: return build_type == "" and package_type == "deb" and sanitizer == "" def parse_env_variables( - build_type, - compiler, - sanitizer, - package_type, - cache, - distcc_hosts, - clang_tidy, - version, - author, - official, - additional_pkgs, - with_coverage, - with_binaries, + build_type: str, + compiler: str, + sanitizer: str, + package_type: str, + cache: str, + s3_bucket: str, + s3_directory: str, + s3_rw_access: bool, + clang_tidy: bool, + version: str, + official: bool, + additional_pkgs: bool, + with_coverage: bool, + with_binaries: str, ): DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" @@ -243,32 +256,43 @@ def parse_env_variables( else: result.append("BUILD_TYPE=None") - if cache == "distcc": - result.append(f"CCACHE_PREFIX={cache}") + if not cache: + cmake_flags.append("-DCOMPILER_CACHE=disabled") - if cache: + if cache == "ccache": + cmake_flags.append("-DCOMPILER_CACHE=ccache") result.append("CCACHE_DIR=/ccache") result.append("CCACHE_COMPRESSLEVEL=5") result.append("CCACHE_BASEDIR=/build") result.append("CCACHE_NOHASHDIR=true") result.append("CCACHE_COMPILERCHECK=content") - cache_maxsize = "15G" - if clang_tidy: - # 15G is not enough for tidy build - cache_maxsize = "25G" + result.append("CCACHE_MAXSIZE=15G") - # `CTCACHE_DIR` has the same purpose as the `CCACHE_DIR` above. - # It's there to have the clang-tidy cache embedded into our standard `CCACHE_DIR` + if cache == "sccache": + cmake_flags.append("-DCOMPILER_CACHE=sccache") + # see https://github.com/mozilla/sccache/blob/main/docs/S3.md + result.append(f"SCCACHE_BUCKET={s3_bucket}") + sccache_dir = "sccache" + if s3_directory: + sccache_dir = f"{s3_directory}/{sccache_dir}" + result.append(f"SCCACHE_S3_KEY_PREFIX={sccache_dir}") + if not s3_rw_access: + result.append("SCCACHE_S3_NO_CREDENTIALS=true") + + if clang_tidy: + # `CTCACHE_DIR` has the same purpose as the `CCACHE_DIR` above. + # It's there to have the clang-tidy cache embedded into our standard `CCACHE_DIR` + if cache == "ccache": result.append("CTCACHE_DIR=/ccache/clang-tidy-cache") - result.append(f"CCACHE_MAXSIZE={cache_maxsize}") - - if distcc_hosts: - hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [ - "localhost/`nproc`" - ] - result.append('DISTCC_HOSTS="' + " ".join(hosts_with_params) + '"') - elif cache == "distcc": - result.append('DISTCC_HOSTS="localhost/`nproc`"') + if s3_bucket: + # see https://github.com/matus-chochlik/ctcache#environment-variables + ctcache_dir = "clang-tidy-cache" + if s3_directory: + ctcache_dir = f"{s3_directory}/{ctcache_dir}" + result.append(f"CTCACHE_S3_BUCKET={s3_bucket}") + result.append(f"CTCACHE_S3_FOLDER={ctcache_dir}") + if not s3_rw_access: + result.append("CTCACHE_S3_NO_CREDENTIALS=true") if additional_pkgs: # NOTE: This are the env for packages/build script @@ -300,9 +324,6 @@ def parse_env_variables( if version: result.append(f"VERSION_STRING='{version}'") - if author: - result.append(f"AUTHOR='{author}'") - if official: cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1") @@ -312,14 +333,14 @@ def parse_env_variables( return result -def dir_name(name: str) -> str: - if not os.path.isabs(name): - name = os.path.abspath(os.path.join(os.getcwd(), name)) - return name +def dir_name(name: str) -> Path: + path = Path(name) + if not path.is_absolute(): + path = Path.cwd() / name + return path -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") +def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="ClickHouse building script using prebuilt Docker image", @@ -331,7 +352,7 @@ if __name__ == "__main__": ) parser.add_argument( "--clickhouse-repo-path", - default=os.path.join(os.path.dirname(SCRIPT_PATH), os.pardir, os.pardir), + default=SCRIPT_PATH.parents[2], type=dir_name, help="ClickHouse git repository", ) @@ -341,17 +362,16 @@ if __name__ == "__main__": parser.add_argument( "--compiler", choices=( - "clang-15", - "clang-15-darwin", - "clang-15-darwin-aarch64", - "clang-15-aarch64", - "clang-15-aarch64-v80compat", - "clang-15-ppc64le", - "clang-15-amd64-compat", - "clang-15-freebsd", - "gcc-11", + "clang-16", + "clang-16-darwin", + "clang-16-darwin-aarch64", + "clang-16-aarch64", + "clang-16-aarch64-v80compat", + "clang-16-ppc64le", + "clang-16-amd64-compat", + "clang-16-freebsd", ), - default="clang-15", + default="clang-16", help="a compiler to use", ) parser.add_argument( @@ -361,17 +381,34 @@ if __name__ == "__main__": ) parser.add_argument("--clang-tidy", action="store_true") - parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( - "--ccache_dir", - default=os.getenv("HOME", "") + "/.ccache", + "--cache", + choices=("ccache", "sccache", ""), + default="", + help="ccache or sccache for objects caching; sccache uses only S3 buckets", + ) + parser.add_argument( + "--ccache-dir", + default=Path.home() / ".ccache", type=dir_name, help="a directory with ccache", ) - parser.add_argument("--distcc-hosts", nargs="+") + parser.add_argument( + "--s3-bucket", + help="an S3 bucket used for sscache and clang-tidy-cache", + ) + parser.add_argument( + "--s3-directory", + default="ccache", + help="an S3 directory prefix used for sscache and clang-tidy-cache", + ) + parser.add_argument( + "--s3-rw-access", + action="store_true", + help="if set, the build fails on errors writing cache to S3", + ) parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") - parser.add_argument("--author", default="clickhouse", help="a package author") parser.add_argument("--official", action="store_true") parser.add_argument("--additional-pkgs", action="store_true") parser.add_argument("--with-coverage", action="store_true") @@ -387,34 +424,54 @@ if __name__ == "__main__": args = parser.parse_args() - image_name = f"clickhouse/{IMAGE_TYPE}-builder" + if args.additional_pkgs and args.package_type != "deb": + raise argparse.ArgumentTypeError( + "Can build additional packages only in deb build" + ) + + if args.cache != "ccache": + args.ccache_dir = None + + if args.with_binaries != "": + if args.package_type != "deb": + raise argparse.ArgumentTypeError( + "Can add additional binaries only in deb build" + ) + logging.info("Should place %s to output", args.with_binaries) + + if args.cache == "sccache": + if not args.s3_bucket: + raise argparse.ArgumentTypeError("sccache must have --s3-bucket set") + + return args + + +def main(): + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") + args = parse_args() ch_root = args.clickhouse_repo_path - if args.additional_pkgs and args.package_type != "deb": - raise Exception("Can build additional packages only in deb build") + dockerfile = ch_root / "docker/packager" / IMAGE_TYPE / "Dockerfile" + image_with_version = IMAGE_NAME + ":" + args.docker_image_version + if args.force_build_image: + build_image(image_with_version, dockerfile) + elif not ( + check_image_exists_locally(image_with_version) or pull_image(image_with_version) + ): + build_image(image_with_version, dockerfile) - if args.with_binaries != "" and args.package_type != "deb": - raise Exception("Can add additional binaries only in deb build") - - if args.with_binaries != "" and args.package_type == "deb": - logging.info("Should place %s to output", args.with_binaries) - - dockerfile = os.path.join(ch_root, "docker/packager", IMAGE_TYPE, "Dockerfile") - image_with_version = image_name + ":" + args.docker_image_version - if not check_image_exists_locally(image_name) or args.force_build_image: - if not pull_image(image_with_version) or args.force_build_image: - build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( args.build_type, args.compiler, args.sanitizer, args.package_type, args.cache, - args.distcc_hosts, + args.s3_bucket, + args.s3_directory, + args.s3_rw_access, args.clang_tidy, args.version, - args.author, args.official, args.additional_pkgs, args.with_coverage, @@ -423,12 +480,15 @@ if __name__ == "__main__": pre_build(args.clickhouse_repo_path, env_prepared) run_docker_image_with_env( - image_name, + image_with_version, args.as_root, args.output_dir, env_prepared, ch_root, args.ccache_dir, - args.docker_image_version, ) logging.info("Output placed into %s", args.output_dir) + + +if __name__ == "__main__": + main() diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 472f25eed2d..1a5d2071f6b 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.2.1.2537" +ARG VERSION="23.4.2.11" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 5dbb244c298..8792d419a16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.2.1.2537" +ARG VERSION="23.4.2.11" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index c6c9fbca421..f6836804454 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -18,13 +18,13 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' +ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index b76b8234c81..8136fd1fbbc 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -10,53 +10,21 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev -# libclang-15-dev does not contain proper symlink: -# -# This is what cmake will search for: -# -# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1 -# /usr/lib/x86_64-linux-gnu/libclang-15.so.1 -# -# This is what exists: -# -# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15* -# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1 -# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0 -# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0 -# ARG TARGETARCH RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ amd64) rarch=x86_64 ;; \ arm64) rarch=aarch64 ;; \ *) exit 1 ;; \ - esac \ - && ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1 + esac # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls -# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b -RUN git clone https://github.com/ClickHouse/woboq_codebrowser \ - && cd woboq_codebrowser \ - && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \ - && ninja \ - && cd .. \ - && rm -rf woboq_codebrowser +# https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b +RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \ + && cd /woboq_codebrowser \ + && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \ + && ninja -ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator -ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator -ENV STATIC_DATA=/woboq_codebrowser/data - -ENV SOURCE_DIRECTORY=/repo_folder -ENV BUILD_DIRECTORY=/build -ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report -ENV SHA=nosha -ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data" - -CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ - cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \ - mkdir -p $HTML_RESULT_DIRECTORY && \ - $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \ - cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ - $CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \ - mv $HTML_RESULT_DIRECTORY /test_output +COPY build.sh / +CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/test/codebrowser/build.sh b/docker/test/codebrowser/build.sh new file mode 100755 index 00000000000..d76d0c3a039 --- /dev/null +++ b/docker/test/codebrowser/build.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -x -e + + +STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data} +SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build} +BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build} +OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output} +HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report} +SHA=${SHA:-nosha} +DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data} +nproc=$(($(nproc) + 2)) # increase parallelism + +read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" + +mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY" +cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DENABLE_WOBOQ_CODEBROWSER=ON "${CMAKE_FLAGS[@]}" +mkdir -p "$HTML_RESULT_DIRECTORY" +echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log' +/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \ + -o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \ + -d "$DATA" \ + |& ts '%Y-%m-%d %H:%M:%S' \ + | tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \ + | grep --line-buffered -v ':[0-9]* Error: ' +cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/" +/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \ + -d "$DATA" |& ts '%Y-%m-%d %H:%M:%S' diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 32546b71eb8..ffb13fc774d 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -20,12 +20,6 @@ RUN apt-get update \ zstd \ --yes --no-install-recommends -# Install CMake 3.20+ for Rust compilation -RUN apt purge cmake --yes -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null -RUN apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main' -RUN apt update && apt install cmake --yes - RUN pip3 install numpy scipy pandas Jinja2 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 086276bed55..dab873377ce 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -9,14 +9,15 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} # Compiler version, normally set by Dockerfile -export LLVM_VERSION=${LLVM_VERSION:-13} +export LLVM_VERSION=${LLVM_VERSION:-16} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about # it being undefined. Also read it as array so that we can pass an empty list # of additional variable to cmake properly, and it doesn't generate an extra # empty parameter. -read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" +# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequential launch +read -ra CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" # Run only matching tests. FASTTEST_FOCUS=${FASTTEST_FOCUS:-""} @@ -37,6 +38,13 @@ export FASTTEST_DATA export FASTTEST_OUT export PATH +function ccache_status +{ + ccache --show-config ||: + ccache --show-stats ||: + SCCACHE_NO_DAEMON=1 sccache --show-stats ||: +} + function start_server { set -m # Spawn server in its own process groups @@ -139,6 +147,7 @@ function clone_submodules contrib/xxHash contrib/simdjson contrib/liburing + contrib/libfiu ) git submodule sync @@ -171,14 +180,14 @@ function run_cmake export CCACHE_COMPILERCHECK=content export CCACHE_MAXSIZE=15G - ccache --show-stats ||: + ccache_status ccache --zero-stats ||: mkdir "$FASTTEST_BUILD" ||: ( cd "$FASTTEST_BUILD" - cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" + cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" ) } @@ -186,14 +195,19 @@ function build { ( cd "$FASTTEST_BUILD" - time ninja clickhouse-bundle 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" + TIMEFORMAT=$'\nreal\t%3R\nuser\t%3U\nsys\t%3S' + ( time ninja clickhouse-bundle) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" + BUILD_SECONDS_ELAPSED=$(awk '/^....-..-.. ..:..:.. real\t[0-9]/ {print $4}' < "$FASTTEST_OUTPUT/build_log.txt") + echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \ + | ts '%Y-%m-%d %H:%M:%S' \ + | tee "$FASTTEST_OUTPUT/test_result.txt" if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" strip programs/clickhouse -o "$FASTTEST_OUTPUT/clickhouse-stripped" zstd --threads=0 "$FASTTEST_OUTPUT/clickhouse-stripped" fi - ccache --show-stats ||: + ccache_status ccache --evict-older-than 1d ||: ) } @@ -243,7 +257,7 @@ function run_tests ) time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ - | tee "$FASTTEST_OUTPUT/test_result.txt" + | tee -a "$FASTTEST_OUTPUT/test_result.txt" set -e clickhouse stop --pid-path "$FASTTEST_DATA" diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 75f2a0af358..d2c8de7a211 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -15,7 +15,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function git_clone_with_retry diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index ce5bae2a031..14c97e479f6 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -32,6 +32,7 @@ RUN apt-get update \ libssl-dev \ libcurl4-openssl-dev \ gdb \ + default-jdk \ software-properties-common \ libkrb5-dev \ krb5-user \ @@ -46,10 +47,9 @@ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable -RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - -RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" - -RUN apt-get update \ +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ + && add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" \ + && apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ docker-ce \ && rm -rf \ @@ -60,7 +60,7 @@ RUN apt-get update \ RUN dockerd --version; docker --version -RUN python3 -m pip install \ +RUN python3 -m pip install --no-cache-dir \ PyMySQL \ aerospike==4.0.0 \ avro==1.10.2 \ @@ -92,15 +92,25 @@ RUN python3 -m pip install \ tzlocal==2.1 \ urllib3 \ requests-kerberos \ + pyspark==3.3.2 \ + delta-spark==2.2.0 \ pyhdfs \ azure-storage-blob \ - meilisearch==0.18.3 + meilisearch==0.18.3 COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ COPY compose/ /compose/ COPY misc/ /misc/ +RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ + && tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \ + && rm spark-3.3.2-bin-hadoop3.tgz + +# download spark and packages +# if you change packages, don't forget to update them in tests/integration/helpers/cluster.py +RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null + RUN set -x \ && addgroup --system dockremap \ && adduser --system dockremap \ @@ -108,6 +118,12 @@ RUN set -x \ && echo 'dockremap:165536:65536' >> /etc/subuid \ && echo 'dockremap:165536:65536' >> /etc/subgid +# Same options as in test/base/Dockerfile +# (in case you need to override them in tests) +ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV UBSAN_OPTIONS='print_stacktrace=1' +ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' + EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] CMD ["sh", "-c", "pytest $PYTEST_OPTS"] diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index c16b2bf1087..fe47fc90951 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -16,7 +16,9 @@ echo '{ # and on hung you can simply press Ctrl-C and it will spawn a python pdb, # but on SIGINT dockerd will exit, so ignore it to preserve the daemon. trap '' INT -dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & +# Binding to an IP address without --tlsverify is deprecated. Startup is intentionally being slowed +# unless --tls=false or --tlsverify=false is set +dockerd --host=unix:///var/run/docker.sock --tls=false --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & set +e reties=0 @@ -37,6 +39,12 @@ set -e docker ps --all --quiet | xargs --no-run-if-empty docker rm || true } +java_path="$(update-alternatives --config java | sed -n 's/.*(providing \/usr\/bin\/java): //p')" +export JAVA_PATH=$java_path +export SPARK_HOME="/spark-3.3.2-bin-hadoop3" +export PATH=$SPARK_HOME/bin:$PATH +export JAVA_TOOL_OPTIONS="-Djdk.attach.allowAttachSelf=true" + echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index 5e321b7c347..694d7fcd916 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index e3df98c7da1..aee11030068 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -3,7 +3,9 @@ set -ex set -o pipefail trap "exit" INT TERM trap 'kill $(jobs -pr) ||:' EXIT +S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"} BUILD_NAME=${BUILD_NAME:-package_release} +export S3_URL BUILD_NAME mkdir db0 ||: mkdir left ||: @@ -28,8 +30,9 @@ function download # Historically there were various paths for the performance test package. # Test all of them. declare -a urls_to_try=( - "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" + "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" + "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" + "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" ) for path in "${urls_to_try[@]}" diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 75b25412ac4..74571777be0 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -6,11 +6,7 @@ export CHPC_CHECK_START_TIMESTAMP S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"} BUILD_NAME=${BUILD_NAME:-package_release} - -COMMON_BUILD_PREFIX="/clickhouse_build_check" -if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then - COMMON_BUILD_PREFIX="" -fi +export S3_URL BUILD_NAME # Sometimes AWS responde with DNS error and it's impossible to retry it with # current curl version options. @@ -66,8 +62,9 @@ function find_reference_sha # test all of them. unset found declare -a urls_to_try=( - "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tar.zst" - "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz" + "$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst" + "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst" + "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz" ) for path in "${urls_to_try[@]}" do @@ -92,10 +89,15 @@ chmod 777 workspace output cd workspace # Download the package for the version we are going to test. -if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst" -then - right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst" -fi +# A temporary solution for migrating into PRs directory +for prefix in "$S3_URL/PRs" "$S3_URL"; +do + if curl_with_retry "$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst" + then + right_path="$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst" + break + fi +done mkdir right wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 --zstd --extract --verbose diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 65bf49c2914..7a4e6386d0d 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -26,6 +26,7 @@ logging.basicConfig( total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds + # Thread executor that does not hides exception that happens during function # execution, and rethrows it after join() class SafeThread(Thread): @@ -158,6 +159,7 @@ for e in subst_elems: available_parameters[name] = values + # Takes parallel lists of templates, substitutes them with all combos of # parameters. The set of parameters is determined based on the first list. # Note: keep the order of queries -- sometimes we have DROP IF EXISTS diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 782cf29863c..214f2d550b4 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -670,7 +670,6 @@ if args.report == "main": ) elif args.report == "all-queries": - print((header_template.format())) add_tested_commits() diff --git a/docker/test/server-jepsen/Dockerfile b/docker/test/server-jepsen/Dockerfile index 958dbfa066a..a212427b2a1 100644 --- a/docker/test/server-jepsen/Dockerfile +++ b/docker/test/server-jepsen/Dockerfile @@ -16,6 +16,11 @@ ENV TESTS_TO_RUN="8" ENV TIME_LIMIT="30" ENV KEEPER_NODE="" +ENV NEMESIS="" +ENV WORKLOAD="" +ENV WITH_LOCAL_BINARY="" +ENV RATE="" +ENV CONCURRENCY="" # volumes diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 4a966d50f74..0c3768df813 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} @@ -15,8 +15,38 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then ls -lath ||: fi +clickhouse_source="--clickhouse-source $CLICKHOUSE_PACKAGE" +if [ -n "$WITH_LOCAL_BINARY" ]; then + clickhouse_source="--clickhouse-source /clickhouse" +fi + +tests_count="--test-count $TESTS_TO_RUN" +tests_to_run="test-all" +workload="" +if [ -n "$WORKLOAD" ]; then + tests_to_run="test" + workload="--workload $WORKLOAD" + tests_count="" +fi + +nemesis="" +if [ -n "$NEMESIS" ]; then + nemesis="--nemesis $NEMESIS" +fi + +rate="" +if [ -n "$RATE" ]; then + rate="--rate $RATE" +fi + +concurrency="" +if [ -n "$CONCURRENCY" ]; then + concurrency="--concurrency $CONCURRENCY" +fi + + cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse" -(lein run server test-all --keeper "$KEEPER_NODE" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 -r 50 --clickhouse-source "$CLICKHOUSE_PACKAGE" --test-count "$TESTS_TO_RUN" || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" +(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log" mv store "$TEST_OUTPUT/" diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile new file mode 100644 index 00000000000..83dcf7e1f56 --- /dev/null +++ b/docker/test/sqllogic/Dockerfile @@ -0,0 +1,45 @@ +# docker build -t clickhouse/sqllogic-test . +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +RUN apt-get update --yes \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + wget \ + git \ + python3 \ + python3-dev \ + python3-pip \ + sqlite3 \ + unixodbc \ + unixodbc-dev \ + sudo \ + && apt-get clean + +RUN pip3 install \ + numpy \ + pyodbc \ + deepdiff + +ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" + +RUN git clone --recursive ${odbc_repo} \ + && mkdir -p /clickhouse-odbc/build \ + && cmake -S /clickhouse-odbc -B /clickhouse-odbc/build \ + && ls /clickhouse-odbc/build/driver \ + && make -j 10 -C /clickhouse-odbc/build \ + && ls /clickhouse-odbc/build/driver \ + && mkdir -p /usr/local/lib64/ && cp /clickhouse-odbc/build/driver/lib*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /clickhouse-odbc/packaging/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample + +ENV TZ=Europe/Amsterdam +ENV MAX_RUN_TIME=900 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" + +RUN git clone --recursive ${sqllogic_test_repo} + +COPY run.sh / +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh new file mode 100755 index 00000000000..8d0252e3c98 --- /dev/null +++ b/docker/test/sqllogic/run.sh @@ -0,0 +1,100 @@ +#!/bin/bash +set -exu +trap "exit" INT TERM + +echo "ENV" +env + +# fail on errors, verbose and export all env variables +set -e -x -a + +echo "Current directory" +pwd +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / +echo "Files in /clickhouse-tests directory" +ls -la /clickhouse-tests +echo "Files in /clickhouse-tests/sqllogic directory" +ls -la /clickhouse-tests/sqllogic +echo "Files in /package_folder directory" +ls -la /package_folder +echo "Files in /test_output" +ls -la /test_output +echo "File in /sqllogictest" +ls -la /sqllogictest + +dpkg -i package_folder/clickhouse-common-static_*.deb +dpkg -i package_folder/clickhouse-common-static-dbg_*.deb +dpkg -i package_folder/clickhouse-server_*.deb +dpkg -i package_folder/clickhouse-client_*.deb + +# install test configs +# /clickhouse-tests/config/install.sh + +sudo clickhouse start + +sleep 5 +for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done + +function run_tests() +{ + set -x + + cd /test_output + + /clickhouse-tests/sqllogic/runner.py --help 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + mkdir -p /test_output/self-test + /clickhouse-tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \ + self-test \ + --self-test-dir /clickhouse-tests/sqllogic/self-test \ + --out-dir /test_output/self-test \ + 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + cat /test_output/self-test/check_status.tsv >> /test_output/check_status.tsv + cat /test_output/self-test/test_results.tsv >> /test_output/test_results.tsv ||: + tar -zcvf self-test.tar.gz self-test 1>/dev/null + + if [ -d /sqllogictest ] + then + mkdir -p /test_output/statements-test + /clickhouse-tests/sqllogic/runner.py \ + --log-file /test_output/runner-statements-test.log \ + --log-level info \ + statements-test \ + --input-dir /sqllogictest \ + --out-dir /test_output/statements-test \ + 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + cat /test_output/statements-test/check_status.tsv >> /test_output/check_status.tsv + cat /test_output/statements-test/test_results.tsv >> /test_output/test_results.tsv + tar -zcvf statements-check.tar.gz statements-test 1>/dev/null + fi +} + +export -f run_tests + +timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2 + +#/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv + +clickhouse-client -q "system flush logs" ||: + +# Stop server so we can safely read data with clickhouse-local. +# Why do we read data with clickhouse-local? +# Because it's the simplest way to read it when server has crashed. +sudo clickhouse stop ||: + +for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done + +grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: +pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & + +# Compressed (FIXME: remove once only github actions will be left) +rm /var/log/clickhouse-server/clickhouse-server.log +mv /var/log/clickhouse-server/stderr.log /test_output/ ||: diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index e7a400b8216..c973b6c6ec6 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -128,7 +128,7 @@ function run_tests() set +e if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then - clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 \ + clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \ --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \ -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index b1302877d6a..96f2aa96dd5 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -10,31 +10,38 @@ import requests import tempfile -DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com' +DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com" AVAILABLE_DATASETS = { - 'hits': 'hits_v1.tar', - 'visits': 'visits_v1.tar', + "hits": "hits_v1.tar", + "visits": "visits_v1.tar", } RETRIES_COUNT = 5 + def _get_temp_file_name(): - return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) + return os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + ) + def build_url(base_url, dataset): - return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset]) + return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset]) + def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: - with open(path, 'wb') as f: + with open(path, "wb") as f: response = requests.get(url, stream=True) response.raise_for_status() - total_length = response.headers.get('content-length') + total_length = response.headers.get("content-length") if total_length is None or int(total_length) == 0: - logging.info("No content-length, will download file without progress") + logging.info( + "No content-length, will download file without progress" + ) f.write(response.content) else: dl = 0 @@ -46,7 +53,11 @@ def dowload_with_progress(url, path): if sys.stdout.isatty(): done = int(50 * dl / total_length) percent = int(100 * float(dl) / total_length) - sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) + sys.stdout.write( + "\r[{}{}] {}%".format( + "=" * done, " " * (50 - done), percent + ) + ) sys.stdout.flush() break except Exception as ex: @@ -56,14 +67,21 @@ def dowload_with_progress(url, path): if os.path.exists(path): os.remove(path) else: - raise Exception("Cannot download dataset from {}, all retries exceeded".format(url)) + raise Exception( + "Cannot download dataset from {}, all retries exceeded".format(url) + ) sys.stdout.write("\n") logging.info("Downloading finished") + def unpack_to_clickhouse_directory(tar_path, clickhouse_path): - logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path) - with tarfile.open(tar_path, 'r') as comp_file: + logging.info( + "Will unpack data from temp path %s to clickhouse db %s", + tar_path, + clickhouse_path, + ) + with tarfile.open(tar_path, "r") as comp_file: comp_file.extractall(path=clickhouse_path) logging.info("Unpack finished") @@ -72,15 +90,21 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( - description="Simple tool for dowloading datasets for clickhouse from S3") + description="Simple tool for dowloading datasets for clickhouse from S3" + ) - parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys())) - parser.add_argument('--url-prefix', default=DEFAULT_URL) - parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/') + parser.add_argument( + "--dataset-names", + required=True, + nargs="+", + choices=list(AVAILABLE_DATASETS.keys()), + ) + parser.add_argument("--url-prefix", default=DEFAULT_URL) + parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/") args = parser.parse_args() datasets = args.dataset_names - logging.info("Will fetch following datasets: %s", ', '.join(datasets)) + logging.info("Will fetch following datasets: %s", ", ".join(datasets)) for dataset in datasets: logging.info("Processing %s", dataset) temp_archive_path = _get_temp_file_name() @@ -92,10 +116,11 @@ if __name__ == "__main__": logging.info("Some exception occured %s", str(ex)) raise finally: - logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path) + logging.info( + "Will remove downloaded file %s from filesystem if it exists", + temp_archive_path, + ) if os.path.exists(temp_archive_path): os.remove(temp_archive_path) logging.info("Processing of %s finished", dataset) logging.info("Fetch finished, enjoy your tables!") - - diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ade59224035..5d0a7b50741 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -132,6 +132,9 @@ function run_tests() ADDITIONAL_OPTIONS+=('--report-logs-stats') + clickhouse-test "00001_select_1" > /dev/null ||: + clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||: + set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ @@ -170,6 +173,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] fi rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: +rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compress tables. diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 15f58d6c3a3..4926967d2d2 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -20,30 +20,31 @@ install_packages package_folder # Thread Fuzzer allows to check more permutations of possible thread scheduling # and find more potential issues. -# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540 -is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'") -if [ "$is_tsan_build" -eq "0" ]; then - export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 - export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 +export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 +export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 +export THREAD_FUZZER_SLEEP_TIME_US=100000 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 +export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 -fi +export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 +export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 + +export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01 +export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01 export ZOOKEEPER_FAULT_INJECTION=1 +# Initial run without S3 to create system.*_log on local file system to make it +# available for dump via clickhouse-local configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index e8c5e17024c..746cc7bb2d5 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ aspell \ curl \ git \ + file \ libxml2-utils \ moreutils \ python3-fuzzywuzzy \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ && rm -rf /root/.cache/pip diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index df32e2833e7..bd0c59a12cd 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -49,12 +49,42 @@ echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_res echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv # Make upgrade check more funny by forcing Ordinary engine for system database -mkdir /var/lib/clickhouse/metadata +mkdir -p /var/lib/clickhouse/metadata echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql # Install previous release packages install_packages previous_release_package_folder +# Initial run without S3 to create system.*_log on local file system to make it +# available for dump via clickhouse-local +configure + +# local_blob_storage disk type does not exist in older versions +sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local_blob_storage|local|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + +# it contains some new settings, but we can safely remove it +rm /etc/clickhouse-server/config.d/merge_tree.xml + +start +stop +mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log + +# force_sync=false doesn't work correctly on some older versions +sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "s|false|true|" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp +sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + +# But we still need default disk because some tables loaded only into it +sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + # Start server from previous release # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 @@ -62,12 +92,13 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1 export ZOOKEEPER_FAULT_INJECTION=0 configure -# But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ - | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|local_blob_storage|local|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp +sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml + +# it contains some new settings, but we can safely remove it +rm /etc/clickhouse-server/config.d/merge_tree.xml start @@ -95,8 +126,7 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c # Install and start new server install_packages package_folder -# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables). -export ZOOKEEPER_FAULT_INJECTION=0 +export ZOOKEEPER_FAULT_INJECTION=1 configure start 500 clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ @@ -161,8 +191,10 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Authentication failed" \ -e "Cannot flush" \ -e "Container already exists" \ - /var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ - && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \ + clickhouse-server.upgrade.log \ + | grep -av -e "_repl_01111_.*Mapping for table with UUID" \ + | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ + && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \ >> /test_output/test_results.tsv \ || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv @@ -176,8 +208,6 @@ tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: collect_query_and_trace_logs -check_oom_in_dmesg - mv /var/log/clickhouse-server/stderr.log /test_output/ # Write check result into check_status.tsv diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 0ee426f4e4d..a49278e960b 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list # 15.0.2 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16 RUN apt-get update \ && apt-get install \ @@ -52,6 +52,7 @@ RUN apt-get update \ lld-${LLVM_VERSION} \ llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ + libclang-${LLVM_VERSION}-dev \ moreutils \ nasm \ ninja-build \ @@ -92,4 +93,17 @@ RUN mkdir /tmp/ccache \ && cd / \ && rm -rf /tmp/ccache +ARG TARGETARCH +ARG SCCACHE_VERSION=v0.4.1 +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ + tar xz -C /tmp \ + && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ + && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r + + COPY process_functional_tests_result.py / diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index da58db8e45d..c75a3500831 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -18,7 +18,7 @@ SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" -def process_test_log(log_path): +def process_test_log(log_path, broken_tests): total = 0 skipped = 0 unknown = 0 @@ -59,11 +59,19 @@ def process_test_log(log_path): total += 1 if TIMEOUT_SIGN in line: - failed += 1 - test_results.append((test_name, "Timeout", test_time, [])) + if test_name in broken_tests: + success += 1 + test_results.append((test_name, "BROKEN", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "Timeout", test_time, [])) elif FAIL_SIGN in line: - failed += 1 - test_results.append((test_name, "FAIL", test_time, [])) + if test_name in broken_tests: + success += 1 + test_results.append((test_name, "BROKEN", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "FAIL", test_time, [])) elif UNKNOWN_SIGN in line: unknown += 1 test_results.append((test_name, "FAIL", test_time, [])) @@ -71,8 +79,19 @@ def process_test_log(log_path): skipped += 1 test_results.append((test_name, "SKIPPED", test_time, [])) else: - success += int(OK_SIGN in line) - test_results.append((test_name, "OK", test_time, [])) + if OK_SIGN in line and test_name in broken_tests: + skipped += 1 + test_results.append( + ( + test_name, + "NOT_FAILED", + test_time, + ["This test passed. Update broken_tests.txt.\n"], + ) + ) + else: + success += int(OK_SIGN in line) + test_results.append((test_name, "OK", test_time, [])) test_end = False elif ( len(test_results) > 0 and test_results[-1][1] == "FAIL" and not test_end @@ -110,7 +129,7 @@ def process_test_log(log_path): ) -def process_result(result_path): +def process_result(result_path, broken_tests): test_results = [] state = "success" description = "" @@ -134,7 +153,7 @@ def process_result(result_path): success_finish, retries, test_results, - ) = process_test_log(result_path) + ) = process_test_log(result_path, broken_tests) is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) logging.info("Is flaky check: %s", is_flacky_check) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) @@ -186,9 +205,17 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") + parser.add_argument("--broken-tests", default="/broken_tests.txt") args = parser.parse_args() - state, description, test_results = process_result(args.in_results_dir) + broken_tests = list() + if os.path.exists(args.broken_tests): + logging.info(f"File {args.broken_tests} with broken tests found") + with open(args.broken_tests) as f: + broken_tests = f.read().splitlines() + logging.info(f"Broken tests in the list: {len(broken_tests)}") + + state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) diff --git a/docs/.gitignore b/docs/.gitignore index 378eac25d31..509538d9051 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1 +1,2 @@ build +clickhouse-docs diff --git a/docs/README.md b/docs/README.md index 9bfd3d2b897..0cd35a4e3ec 100644 --- a/docs/README.md +++ b/docs/README.md @@ -40,6 +40,8 @@ The documentation contains information about all the aspects of the ClickHouse l At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, and Chinese. We store the reference documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs), and user guides in a separate repo [Clickhouse/clickhouse-docs](https://github.com/ClickHouse/clickhouse-docs). +To get the latter launch the `get-clickhouse-docs.sh` script. + Each language lies in the corresponding folder. Files that are not translated from English are symbolic links to the English ones. diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 403aab6f4e6..1699be138c8 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -85,9 +85,9 @@ echo echo "Successfully downloaded the ClickHouse binary, you can run it as: ./${clickhouse}" -#if [ "${OS}" = "Linux" ] -#then - #echo - #echo "You can also install it: - #sudo ./${clickhouse} install" -#fi +if [ "${OS}" = "Linux" ] +then + echo + echo "You can also install it: + sudo ./${clickhouse} install" +fi diff --git a/docs/changelogs/v21.10.1.8013-prestable.md b/docs/changelogs/v21.10.1.8013-prestable.md index 02ea593e02a..1c40aa67711 100644 --- a/docs/changelogs/v21.10.1.8013-prestable.md +++ b/docs/changelogs/v21.10.1.8013-prestable.md @@ -108,7 +108,7 @@ sidebar_label: 2022 * Print out git status information at CMake configure stage. [#28047](https://github.com/ClickHouse/ClickHouse/pull/28047) ([Braulio Valdivielso Martínez](https://github.com/BraulioVM)). * Add new log level `` for testing environments. [#28559](https://github.com/ClickHouse/ClickHouse/pull/28559) ([alesapin](https://github.com/alesapin)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix handling null value with type of Nullable(String) in function JSONExtract. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930) . This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). * Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). diff --git a/docs/changelogs/v21.10.2.15-stable.md b/docs/changelogs/v21.10.2.15-stable.md index 4ae5c8f5072..42402808260 100644 --- a/docs/changelogs/v21.10.2.15-stable.md +++ b/docs/changelogs/v21.10.2.15-stable.md @@ -17,7 +17,7 @@ sidebar_label: 2022 * Backported in [#29970](https://github.com/ClickHouse/ClickHouse/issues/29970): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#30051](https://github.com/ClickHouse/ClickHouse/issues/30051): Fix releasing query ID and session ID at the end of query processing while handing gRPC call. This PR fixes flaky test [test_grpc_protocol/test.py::test_session](https://clickhouse-test-reports.s3.yandex.net/0/1ac03811a2df9717fa7c633d1af03def821d24b6/integration_tests_(memory).html). [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29054](https://github.com/ClickHouse/ClickHouse/issues/29054): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). * Backported in [#28795](https://github.com/ClickHouse/ClickHouse/issues/28795): - Fix the number of arguments required by s2RectAdd and s2RectContains functions. [#28663](https://github.com/ClickHouse/ClickHouse/pull/28663) ([Bharat Nallan](https://github.com/bharatnc)). diff --git a/docs/changelogs/v21.10.3.9-stable.md b/docs/changelogs/v21.10.3.9-stable.md index d0384d58e23..327e34ca64c 100644 --- a/docs/changelogs/v21.10.3.9-stable.md +++ b/docs/changelogs/v21.10.3.9-stable.md @@ -18,11 +18,11 @@ sidebar_label: 2022 * Backported in [#30620](https://github.com/ClickHouse/ClickHouse/issues/30620): Fix reading from empty file on encrypted disk. [#30494](https://github.com/ClickHouse/ClickHouse/pull/30494) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#31369](https://github.com/ClickHouse/ClickHouse/issues/31369): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release +#### Bug Fix (user-visible misbehaviour in official stable release * Backported in [#30915](https://github.com/ClickHouse/ClickHouse/issues/30915): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#30824](https://github.com/ClickHouse/ClickHouse/issues/30824): Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#30766](https://github.com/ClickHouse/ClickHouse/issues/30766): Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). diff --git a/docs/changelogs/v21.10.4.26-stable.md b/docs/changelogs/v21.10.4.26-stable.md index 7d1cc93bb98..267f2109f6f 100644 --- a/docs/changelogs/v21.10.4.26-stable.md +++ b/docs/changelogs/v21.10.4.26-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#31573](https://github.com/ClickHouse/ClickHouse/issues/31573): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#31518](https://github.com/ClickHouse/ClickHouse/issues/31518): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). * Backported in [#31554](https://github.com/ClickHouse/ClickHouse/issues/31554): Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). diff --git a/docs/changelogs/v21.10.5.3-stable.md b/docs/changelogs/v21.10.5.3-stable.md index 88d3d70028e..7c717dfe838 100644 --- a/docs/changelogs/v21.10.5.3-stable.md +++ b/docs/changelogs/v21.10.5.3-stable.md @@ -11,7 +11,7 @@ sidebar_label: 2022 * Backported in [#32252](https://github.com/ClickHouse/ClickHouse/issues/32252): Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#32346](https://github.com/ClickHouse/ClickHouse/issues/32346): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32151](https://github.com/ClickHouse/ClickHouse/issues/32151): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#32093](https://github.com/ClickHouse/ClickHouse/issues/32093): Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([Alexander Tokmakov](https://github.com/tavplubix)). diff --git a/docs/changelogs/v21.10.6.2-stable.md b/docs/changelogs/v21.10.6.2-stable.md index 74f037b2f8f..0e8e934e2fa 100644 --- a/docs/changelogs/v21.10.6.2-stable.md +++ b/docs/changelogs/v21.10.6.2-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#32692](https://github.com/ClickHouse/ClickHouse/issues/32692): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32680](https://github.com/ClickHouse/ClickHouse/issues/32680): Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). * Backported in [#32285](https://github.com/ClickHouse/ClickHouse/issues/32285): Dictionaries fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). @@ -23,7 +23,7 @@ sidebar_label: 2022 * Backported in [#33182](https://github.com/ClickHouse/ClickHouse/issues/33182): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#33655](https://github.com/ClickHouse/ClickHouse/issues/33655): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release): +#### Bug Fix (user-visible misbehaviour in official stable release): * Backported in [#32657](https://github.com/ClickHouse/ClickHouse/issues/32657): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.11.1.8636-prestable.md b/docs/changelogs/v21.11.1.8636-prestable.md index 407a5196c1d..d6a435dd3ce 100644 --- a/docs/changelogs/v21.11.1.8636-prestable.md +++ b/docs/changelogs/v21.11.1.8636-prestable.md @@ -124,11 +124,11 @@ sidebar_label: 2022 * Recursive submodules are no longer needed for ClickHouse. [#30315](https://github.com/ClickHouse/ClickHouse/pull/30315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Added docker image to build docs. [#30499](https://github.com/ClickHouse/ClickHouse/pull/30499) ([Ilya Yatsishin](https://github.com/qoega)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release +#### Bug Fix (user-visible misbehaviour in official stable release * Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix hanging DDL queries on Replicated database while adding a new replica. [#29328](https://github.com/ClickHouse/ClickHouse/pull/29328) ([Kevin Michel](https://github.com/kmichel-aiven)). * Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). diff --git a/docs/changelogs/v21.11.11.1-stable.md b/docs/changelogs/v21.11.11.1-stable.md index 85a8975c6e7..76cd5239cba 100644 --- a/docs/changelogs/v21.11.11.1-stable.md +++ b/docs/changelogs/v21.11.11.1-stable.md @@ -7,6 +7,6 @@ sidebar_label: 2022 ### ClickHouse release v21.11.11.1-stable FIXME as compared to v21.11.10.1-stable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#33656](https://github.com/ClickHouse/ClickHouse/issues/33656): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.11.2.2-stable.md b/docs/changelogs/v21.11.2.2-stable.md index bf02de235e4..44938addad5 100644 --- a/docs/changelogs/v21.11.2.2-stable.md +++ b/docs/changelogs/v21.11.2.2-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.11.2.2-stable FIXME as compared to v21.11.1.8636-prestable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#31154](https://github.com/ClickHouse/ClickHouse/issues/31154): Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). * Backported in [#31027](https://github.com/ClickHouse/ClickHouse/issues/31027): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.11.3.6-stable.md b/docs/changelogs/v21.11.3.6-stable.md index e3886c4efac..f9cc64e2c2b 100644 --- a/docs/changelogs/v21.11.3.6-stable.md +++ b/docs/changelogs/v21.11.3.6-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#31246](https://github.com/ClickHouse/ClickHouse/issues/31246): Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#31206](https://github.com/ClickHouse/ClickHouse/issues/31206): Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). * Backported in [#31202](https://github.com/ClickHouse/ClickHouse/issues/31202): Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.11.4.14-stable.md b/docs/changelogs/v21.11.4.14-stable.md index f05f43d9de1..8882832d1fe 100644 --- a/docs/changelogs/v21.11.4.14-stable.md +++ b/docs/changelogs/v21.11.4.14-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#31370](https://github.com/ClickHouse/ClickHouse/issues/31370): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#31282](https://github.com/ClickHouse/ClickHouse/issues/31282): Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#31237](https://github.com/ClickHouse/ClickHouse/issues/31237): Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). diff --git a/docs/changelogs/v21.11.5.33-stable.md b/docs/changelogs/v21.11.5.33-stable.md index 3780d5a2530..11e7f24dbb1 100644 --- a/docs/changelogs/v21.11.5.33-stable.md +++ b/docs/changelogs/v21.11.5.33-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#31572](https://github.com/ClickHouse/ClickHouse/issues/31572): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#31517](https://github.com/ClickHouse/ClickHouse/issues/31517): Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). * Backported in [#31551](https://github.com/ClickHouse/ClickHouse/issues/31551): Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). diff --git a/docs/changelogs/v21.11.6.7-stable.md b/docs/changelogs/v21.11.6.7-stable.md index 1f1935d1865..cddd472076a 100644 --- a/docs/changelogs/v21.11.6.7-stable.md +++ b/docs/changelogs/v21.11.6.7-stable.md @@ -11,7 +11,7 @@ sidebar_label: 2022 * Backported in [#32254](https://github.com/ClickHouse/ClickHouse/issues/32254): Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#32345](https://github.com/ClickHouse/ClickHouse/issues/32345): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32152](https://github.com/ClickHouse/ClickHouse/issues/32152): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#32298](https://github.com/ClickHouse/ClickHouse/issues/32298): Fix recursive user defined functions crash. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). diff --git a/docs/changelogs/v21.11.7.9-stable.md b/docs/changelogs/v21.11.7.9-stable.md index baa6b0290a5..596d16a28ef 100644 --- a/docs/changelogs/v21.11.7.9-stable.md +++ b/docs/changelogs/v21.11.7.9-stable.md @@ -10,13 +10,13 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#32691](https://github.com/ClickHouse/ClickHouse/issues/32691): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32711](https://github.com/ClickHouse/ClickHouse/issues/32711): Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#32568](https://github.com/ClickHouse/ClickHouse/issues/32568): Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)). * Backported in [#32732](https://github.com/ClickHouse/ClickHouse/issues/32732): Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release): +#### Bug Fix (user-visible misbehaviour in official stable release): * Backported in [#32617](https://github.com/ClickHouse/ClickHouse/issues/32617): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.11.8.4-stable.md b/docs/changelogs/v21.11.8.4-stable.md index bd71374e870..28d413dd2c5 100644 --- a/docs/changelogs/v21.11.8.4-stable.md +++ b/docs/changelogs/v21.11.8.4-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.11.8.4-stable FIXME as compared to v21.11.7.9-stable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32679](https://github.com/ClickHouse/ClickHouse/issues/32679): Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). * Backported in [#32543](https://github.com/ClickHouse/ClickHouse/issues/32543): Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)). diff --git a/docs/changelogs/v21.11.9.1-stable.md b/docs/changelogs/v21.11.9.1-stable.md index 1473a4a152c..ea36479c943 100644 --- a/docs/changelogs/v21.11.9.1-stable.md +++ b/docs/changelogs/v21.11.9.1-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.11.9.1-stable FIXME as compared to v21.11.8.4-stable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#33181](https://github.com/ClickHouse/ClickHouse/issues/33181): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). diff --git a/docs/changelogs/v21.12.1.9017-prestable.md b/docs/changelogs/v21.12.1.9017-prestable.md index e8f2ca283a4..88b8260e312 100644 --- a/docs/changelogs/v21.12.1.9017-prestable.md +++ b/docs/changelogs/v21.12.1.9017-prestable.md @@ -132,7 +132,7 @@ sidebar_label: 2022 * Build rpm and tgz packages in master and release branches workfolw. [#32048](https://github.com/ClickHouse/ClickHouse/pull/32048) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix broken symlink for sysroot/linux-riscv64/usr/lib. [#32071](https://github.com/ClickHouse/ClickHouse/pull/32071) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). * Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). diff --git a/docs/changelogs/v21.12.2.17-stable.md b/docs/changelogs/v21.12.2.17-stable.md index 94cfc1b88a8..67761ce0e08 100644 --- a/docs/changelogs/v21.12.2.17-stable.md +++ b/docs/changelogs/v21.12.2.17-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#32693](https://github.com/ClickHouse/ClickHouse/issues/32693): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32681](https://github.com/ClickHouse/ClickHouse/issues/32681): Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). * Backported in [#32483](https://github.com/ClickHouse/ClickHouse/issues/32483): Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). @@ -23,7 +23,7 @@ sidebar_label: 2022 * Backported in [#32733](https://github.com/ClickHouse/ClickHouse/issues/32733): Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Backported in [#32793](https://github.com/ClickHouse/ClickHouse/issues/32793): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release): +#### Bug Fix (user-visible misbehaviour in official stable release): * Backported in [#32616](https://github.com/ClickHouse/ClickHouse/issues/32616): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.12.3.32-stable.md b/docs/changelogs/v21.12.3.32-stable.md index ea11efa46c5..c8c423a77b9 100644 --- a/docs/changelogs/v21.12.3.32-stable.md +++ b/docs/changelogs/v21.12.3.32-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#33018](https://github.com/ClickHouse/ClickHouse/issues/33018): - ClickHouse Keeper handler should remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32890](https://github.com/ClickHouse/ClickHouse/issues/32890): Fix LOGICAL_ERROR when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). * Backported in [#33183](https://github.com/ClickHouse/ClickHouse/issues/33183): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). diff --git a/docs/changelogs/v21.12.4.1-stable.md b/docs/changelogs/v21.12.4.1-stable.md index bd38dbd0c59..3345f76b317 100644 --- a/docs/changelogs/v21.12.4.1-stable.md +++ b/docs/changelogs/v21.12.4.1-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#33551](https://github.com/ClickHouse/ClickHouse/issues/33551): Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#33537](https://github.com/ClickHouse/ClickHouse/issues/33537): Fix ORC stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)). * Backported in [#33654](https://github.com/ClickHouse/ClickHouse/issues/33654): Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.3.16.5-lts.md b/docs/changelogs/v21.3.16.5-lts.md index 123b27097d4..6aedeff5acb 100644 --- a/docs/changelogs/v21.3.16.5-lts.md +++ b/docs/changelogs/v21.3.16.5-lts.md @@ -25,7 +25,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#28075](https://github.com/ClickHouse/ClickHouse/issues/28075): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28181](https://github.com/ClickHouse/ClickHouse/issues/28181): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). * Backported in [#28293](https://github.com/ClickHouse/ClickHouse/issues/28293): Fix inconsistent result in queries with `ORDER BY` and `Merge` tables with enabled setting `optimize_read_in_order`. [#28266](https://github.com/ClickHouse/ClickHouse/pull/28266) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v21.3.17.2-lts.md b/docs/changelogs/v21.3.17.2-lts.md index 6c288b5a0d8..9104ae7aa0a 100644 --- a/docs/changelogs/v21.3.17.2-lts.md +++ b/docs/changelogs/v21.3.17.2-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#28647](https://github.com/ClickHouse/ClickHouse/issues/28647): Fix a rare bug in `DROP PART` which can lead to the error `Unexpected merged part intersects drop range`. [#27807](https://github.com/ClickHouse/ClickHouse/pull/27807) ([alesapin](https://github.com/alesapin)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28569](https://github.com/ClickHouse/ClickHouse/issues/28569): Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). * Backported in [#28857](https://github.com/ClickHouse/ClickHouse/issues/28857): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v21.3.18.4-lts.md b/docs/changelogs/v21.3.18.4-lts.md index d14dddfb1a5..33f4b86d81c 100644 --- a/docs/changelogs/v21.3.18.4-lts.md +++ b/docs/changelogs/v21.3.18.4-lts.md @@ -14,7 +14,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#30041](https://github.com/ClickHouse/ClickHouse/issues/30041): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29260](https://github.com/ClickHouse/ClickHouse/issues/29260): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). * Backported in [#29026](https://github.com/ClickHouse/ClickHouse/issues/29026): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.3.19.1-lts.md b/docs/changelogs/v21.3.19.1-lts.md index 4d4404077a5..26c36725610 100644 --- a/docs/changelogs/v21.3.19.1-lts.md +++ b/docs/changelogs/v21.3.19.1-lts.md @@ -14,11 +14,11 @@ sidebar_label: 2022 * Backported in [#31577](https://github.com/ClickHouse/ClickHouse/issues/31577): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). * Backported in [#32347](https://github.com/ClickHouse/ClickHouse/issues/32347): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release +#### Bug Fix (user-visible misbehaviour in official stable release * Backported in [#30913](https://github.com/ClickHouse/ClickHouse/issues/30913): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#30750](https://github.com/ClickHouse/ClickHouse/issues/30750): Functions for case-insensitive search in UTF8 strings like `positionCaseInsensitiveUTF8` and `countSubstringsCaseInsensitiveUTF8` might find substrings that actually does not match, it's fixed. [#30663](https://github.com/ClickHouse/ClickHouse/pull/30663) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#31038](https://github.com/ClickHouse/ClickHouse/issues/31038): Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.3.20.1-lts.md b/docs/changelogs/v21.3.20.1-lts.md index f9ce3cba78b..1b235556faf 100644 --- a/docs/changelogs/v21.3.20.1-lts.md +++ b/docs/changelogs/v21.3.20.1-lts.md @@ -11,7 +11,7 @@ sidebar_label: 2022 * Backported in [#32690](https://github.com/ClickHouse/ClickHouse/issues/32690): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). * Backported in [#33727](https://github.com/ClickHouse/ClickHouse/issues/33727): Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32791](https://github.com/ClickHouse/ClickHouse/issues/32791): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). diff --git a/docs/changelogs/v21.6.9.7-stable.md b/docs/changelogs/v21.6.9.7-stable.md index ca1edeb1722..0a989e4d6b7 100644 --- a/docs/changelogs/v21.6.9.7-stable.md +++ b/docs/changelogs/v21.6.9.7-stable.md @@ -40,7 +40,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#28030](https://github.com/ClickHouse/ClickHouse/issues/28030): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28119](https://github.com/ClickHouse/ClickHouse/issues/28119): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). * Backported in [#28179](https://github.com/ClickHouse/ClickHouse/issues/28179): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). diff --git a/docs/changelogs/v21.7.10.4-stable.md b/docs/changelogs/v21.7.10.4-stable.md index daa063ebf5a..238dfd651eb 100644 --- a/docs/changelogs/v21.7.10.4-stable.md +++ b/docs/changelogs/v21.7.10.4-stable.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#27925](https://github.com/ClickHouse/ClickHouse/issues/27925): Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#28752](https://github.com/ClickHouse/ClickHouse/issues/28752): Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28509](https://github.com/ClickHouse/ClickHouse/issues/28509): Fixed possible ZooKeeper watches leak on background processing of distributed DDL queue. Closes [#26036](https://github.com/ClickHouse/ClickHouse/issues/26036). [#28446](https://github.com/ClickHouse/ClickHouse/pull/28446) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#28570](https://github.com/ClickHouse/ClickHouse/issues/28570): Fix bug which can lead to error `Existing table metadata in ZooKeeper differs in sorting key expression.` after alter of `ReplicatedVersionedCollapsingMergeTree`. Fixes [#28515](https://github.com/ClickHouse/ClickHouse/issues/28515). [#28528](https://github.com/ClickHouse/ClickHouse/pull/28528) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v21.7.11.3-stable.md b/docs/changelogs/v21.7.11.3-stable.md index b3d1c9a44fd..8ccc31657de 100644 --- a/docs/changelogs/v21.7.11.3-stable.md +++ b/docs/changelogs/v21.7.11.3-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.7.11.3-stable FIXME as compared to v21.7.10.4-stable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29024](https://github.com/ClickHouse/ClickHouse/issues/29024): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#29195](https://github.com/ClickHouse/ClickHouse/issues/29195): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.7.9.7-stable.md b/docs/changelogs/v21.7.9.7-stable.md index ac985f7af37..7aaab54af6b 100644 --- a/docs/changelogs/v21.7.9.7-stable.md +++ b/docs/changelogs/v21.7.9.7-stable.md @@ -23,7 +23,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#28032](https://github.com/ClickHouse/ClickHouse/issues/28032): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28116](https://github.com/ClickHouse/ClickHouse/issues/28116): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). * Backported in [#28183](https://github.com/ClickHouse/ClickHouse/issues/28183): Fixed possible excessive number of conditions moved from `WHERE` to `PREWHERE` (optimization controlled by settings `optimize_move_to_prewhere`). [#28139](https://github.com/ClickHouse/ClickHouse/pull/28139) ([lthaooo](https://github.com/lthaooo)). diff --git a/docs/changelogs/v21.8.10.19-lts.md b/docs/changelogs/v21.8.10.19-lts.md index 56c682ddbd2..5873f2b2ff9 100644 --- a/docs/changelogs/v21.8.10.19-lts.md +++ b/docs/changelogs/v21.8.10.19-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Improvement * Backported in [#30452](https://github.com/ClickHouse/ClickHouse/issues/30452): Allow symlinks to files in user_files directory for file table function. [#30309](https://github.com/ClickHouse/ClickHouse/pull/30309) ([Kseniia Sumarokova](https://github.com/kssenii)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29724](https://github.com/ClickHouse/ClickHouse/issues/29724): Fix null deference for `GROUP BY WITH TOTALS HAVING` (when the column from `HAVING` wasn't selected). [#29553](https://github.com/ClickHouse/ClickHouse/pull/29553) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#30233](https://github.com/ClickHouse/ClickHouse/issues/30233): Fix INSERT SELECT incorrectly fills MATERIALIZED column based of Nullable column. [#30189](https://github.com/ClickHouse/ClickHouse/pull/30189) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v21.8.11.4-lts.md b/docs/changelogs/v21.8.11.4-lts.md index d88d191bae2..81bbea4ee21 100644 --- a/docs/changelogs/v21.8.11.4-lts.md +++ b/docs/changelogs/v21.8.11.4-lts.md @@ -16,11 +16,11 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#31368](https://github.com/ClickHouse/ClickHouse/issues/31368): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release +#### Bug Fix (user-visible misbehaviour in official stable release * Backported in [#30914](https://github.com/ClickHouse/ClickHouse/issues/30914): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28756](https://github.com/ClickHouse/ClickHouse/issues/28756): Fix NOT-IN index optimization when not all key columns are used. This fixes [#28120](https://github.com/ClickHouse/ClickHouse/issues/28120). [#28315](https://github.com/ClickHouse/ClickHouse/pull/28315) ([Amos Bird](https://github.com/amosbird)). * Backported in [#30825](https://github.com/ClickHouse/ClickHouse/issues/30825): Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v21.8.12.29-lts.md b/docs/changelogs/v21.8.12.29-lts.md index bd1f0c7fe60..8b68a6a3af8 100644 --- a/docs/changelogs/v21.8.12.29-lts.md +++ b/docs/changelogs/v21.8.12.29-lts.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#31575](https://github.com/ClickHouse/ClickHouse/issues/31575): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#31204](https://github.com/ClickHouse/ClickHouse/issues/31204): Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). * Backported in [#31253](https://github.com/ClickHouse/ClickHouse/issues/31253): Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v21.8.13.6-lts.md b/docs/changelogs/v21.8.13.6-lts.md index 63ac956c3d5..205628c6330 100644 --- a/docs/changelogs/v21.8.13.6-lts.md +++ b/docs/changelogs/v21.8.13.6-lts.md @@ -11,7 +11,7 @@ sidebar_label: 2022 * Backported in [#32688](https://github.com/ClickHouse/ClickHouse/issues/32688): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). * Backported in [#32343](https://github.com/ClickHouse/ClickHouse/issues/32343): Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#32108](https://github.com/ClickHouse/ClickHouse/issues/32108): Fix crash with empty result on odbc query. Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#32150](https://github.com/ClickHouse/ClickHouse/issues/32150): Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). @@ -26,7 +26,7 @@ sidebar_label: 2022 * Backported in [#33048](https://github.com/ClickHouse/ClickHouse/issues/33048): Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#32795](https://github.com/ClickHouse/ClickHouse/issues/32795): fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release): +#### Bug Fix (user-visible misbehaviour in official stable release): * Backported in [#32659](https://github.com/ClickHouse/ClickHouse/issues/32659): Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.8.14.5-lts.md b/docs/changelogs/v21.8.14.5-lts.md index 1012d9c5784..75d966ec9e7 100644 --- a/docs/changelogs/v21.8.14.5-lts.md +++ b/docs/changelogs/v21.8.14.5-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.8.14.5-lts FIXME as compared to v21.8.13.6-lts -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#33184](https://github.com/ClickHouse/ClickHouse/issues/33184): Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#33659](https://github.com/ClickHouse/ClickHouse/issues/33659): Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.8.15.7-lts.md b/docs/changelogs/v21.8.15.7-lts.md index eb6bf39d7be..1522c28016a 100644 --- a/docs/changelogs/v21.8.15.7-lts.md +++ b/docs/changelogs/v21.8.15.7-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.8.15.7-lts FIXME as compared to v21.8.14.5-lts -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#34121](https://github.com/ClickHouse/ClickHouse/issues/34121): Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#34097](https://github.com/ClickHouse/ClickHouse/issues/34097): Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.8.5.7-lts.md b/docs/changelogs/v21.8.5.7-lts.md index 00c6c6e46a7..fa459e093f7 100644 --- a/docs/changelogs/v21.8.5.7-lts.md +++ b/docs/changelogs/v21.8.5.7-lts.md @@ -25,7 +25,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#28031](https://github.com/ClickHouse/ClickHouse/issues/28031): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#27974](https://github.com/ClickHouse/ClickHouse/issues/27974): Fix handling null value with type of Nullable(String) in function JSONExtract. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930) . This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). * Backported in [#28117](https://github.com/ClickHouse/ClickHouse/issues/28117): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). diff --git a/docs/changelogs/v21.8.6.15-lts.md b/docs/changelogs/v21.8.6.15-lts.md index dc8ae4ec9b5..aa51dc1ac1e 100644 --- a/docs/changelogs/v21.8.6.15-lts.md +++ b/docs/changelogs/v21.8.6.15-lts.md @@ -16,7 +16,7 @@ sidebar_label: 2022 * Backported in [#27923](https://github.com/ClickHouse/ClickHouse/issues/27923): Fix PostgreSQL-style cast (`::` operator) with negative numbers. [#27876](https://github.com/ClickHouse/ClickHouse/pull/27876) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#28753](https://github.com/ClickHouse/ClickHouse/issues/28753): Fix transformation of disjunctions chain to `IN` (controlled by settings `optimize_min_equality_disjunction_chain_length`) in distributed queries with settings `legacy_column_name_of_tuple_literal = 0`. [#28658](https://github.com/ClickHouse/ClickHouse/pull/28658) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28644](https://github.com/ClickHouse/ClickHouse/issues/28644): Fix rare case when changes of `clickhouse-keeper` settings may lead to lost logs and server hung. [#28360](https://github.com/ClickHouse/ClickHouse/pull/28360) ([alesapin](https://github.com/alesapin)). * Backported in [#28508](https://github.com/ClickHouse/ClickHouse/issues/28508): Fix lack of quotes for table names in MaterializedPostgreSQL engine. Closes [#28316](https://github.com/ClickHouse/ClickHouse/issues/28316). [#28433](https://github.com/ClickHouse/ClickHouse/pull/28433) ([Kseniia Sumarokova](https://github.com/kssenii)). diff --git a/docs/changelogs/v21.8.7.22-lts.md b/docs/changelogs/v21.8.7.22-lts.md index b6c5b70e096..7a751be4132 100644 --- a/docs/changelogs/v21.8.7.22-lts.md +++ b/docs/changelogs/v21.8.7.22-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.8.7.22-lts FIXME as compared to v21.8.6.15-lts -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29121](https://github.com/ClickHouse/ClickHouse/issues/29121): Better check for connection usability and also catch any exception in RabbitMQ shutdown just in case. [#28797](https://github.com/ClickHouse/ClickHouse/pull/28797) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#29027](https://github.com/ClickHouse/ClickHouse/issues/29027): Fix the number of threads used in `GLOBAL IN` subquery (it was executed in single threads since [#19414](https://github.com/ClickHouse/ClickHouse/issues/19414) bugfix). [#28997](https://github.com/ClickHouse/ClickHouse/pull/28997) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.8.8.29-lts.md b/docs/changelogs/v21.8.8.29-lts.md index 160d30a6aa9..e988c3c6801 100644 --- a/docs/changelogs/v21.8.8.29-lts.md +++ b/docs/changelogs/v21.8.8.29-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#29128](https://github.com/ClickHouse/ClickHouse/issues/29128): Fix bug in `clickhouse-keeper-converter` which can lead to incorrect ZooKeeper log deserialization. [#29071](https://github.com/ClickHouse/ClickHouse/pull/29071) ([小路](https://github.com/nicelulu)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29262](https://github.com/ClickHouse/ClickHouse/issues/29262): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). * Backported in [#29106](https://github.com/ClickHouse/ClickHouse/issues/29106): Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v21.8.9.13-lts.md b/docs/changelogs/v21.8.9.13-lts.md index a48ca30080e..71919c48c47 100644 --- a/docs/changelogs/v21.8.9.13-lts.md +++ b/docs/changelogs/v21.8.9.13-lts.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#29817](https://github.com/ClickHouse/ClickHouse/issues/29817): Allow using a materialized column as the sharding key in a distributed table even if `insert_allow_materialized_columns=0`:. [#28637](https://github.com/ClickHouse/ClickHouse/pull/28637) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#29973](https://github.com/ClickHouse/ClickHouse/issues/29973): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29676](https://github.com/ClickHouse/ClickHouse/issues/29676): Fix vertical merges of projection parts. This fixes [#29253](https://github.com/ClickHouse/ClickHouse/issues/29253) . This PR also fixes several projection merge/mutation issues introduced in https://github.com/ClickHouse/ClickHouse/pull/25165. [#29337](https://github.com/ClickHouse/ClickHouse/pull/29337) ([Amos Bird](https://github.com/amosbird)). * Backported in [#29538](https://github.com/ClickHouse/ClickHouse/issues/29538): Fix possible `Block structure mismatch` for subqueries with pushed-down `HAVING` predicate. Fixes [#29010](https://github.com/ClickHouse/ClickHouse/issues/29010). [#29475](https://github.com/ClickHouse/ClickHouse/pull/29475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v21.9.2.17-stable.md b/docs/changelogs/v21.9.2.17-stable.md index 39e3f627f4a..08d208ec97b 100644 --- a/docs/changelogs/v21.9.2.17-stable.md +++ b/docs/changelogs/v21.9.2.17-stable.md @@ -25,7 +25,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#28029](https://github.com/ClickHouse/ClickHouse/issues/28029): Temporarily switched ubuntu apt repository to mirror ru.archive.ubuntu.com as default one(archive.ubuntu.com) is not responding from our CI. [#28016](https://github.com/ClickHouse/ClickHouse/pull/28016) ([Ilya Yatsishin](https://github.com/qoega)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#27973](https://github.com/ClickHouse/ClickHouse/issues/27973): Fix handling null value with type of Nullable(String) in function JSONExtract. This fixes [#27929](https://github.com/ClickHouse/ClickHouse/issues/27929) and [#27930](https://github.com/ClickHouse/ClickHouse/issues/27930) . This was introduced in https://github.com/ClickHouse/ClickHouse/pull/25452 . [#27939](https://github.com/ClickHouse/ClickHouse/pull/27939) ([Amos Bird](https://github.com/amosbird)). * Backported in [#28118](https://github.com/ClickHouse/ClickHouse/issues/28118): Fix extremely rare segfaults on shutdown due to incorrect order of context/config reloader shutdown. [#28088](https://github.com/ClickHouse/ClickHouse/pull/28088) ([nvartolomei](https://github.com/nvartolomei)). diff --git a/docs/changelogs/v21.9.3.30-stable.md b/docs/changelogs/v21.9.3.30-stable.md index ee2dd24277d..28375c5588e 100644 --- a/docs/changelogs/v21.9.3.30-stable.md +++ b/docs/changelogs/v21.9.3.30-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Improvement * Backported in [#28897](https://github.com/ClickHouse/ClickHouse/issues/28897): Use real tmp file instead of predefined "rows_sources" for vertical merges. This avoids generating garbage directories in tmp disks. [#28299](https://github.com/ClickHouse/ClickHouse/pull/28299) ([Amos Bird](https://github.com/amosbird)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#28815](https://github.com/ClickHouse/ClickHouse/issues/28815): Fix possible crash for `SELECT` with partially created aggregate projection in case of exception. [#28700](https://github.com/ClickHouse/ClickHouse/pull/28700) ([Amos Bird](https://github.com/amosbird)). * Backported in [#28789](https://github.com/ClickHouse/ClickHouse/issues/28789): Fix benign race condition in ReplicatedMergeTreeQueue. Shouldn't be visible for user, but can lead to subtle bugs. [#28734](https://github.com/ClickHouse/ClickHouse/pull/28734) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v21.9.4.35-stable.md b/docs/changelogs/v21.9.4.35-stable.md index 5a556df1b3a..0b300574559 100644 --- a/docs/changelogs/v21.9.4.35-stable.md +++ b/docs/changelogs/v21.9.4.35-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v21.9.4.35-stable FIXME as compared to v21.9.3.30-stable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29191](https://github.com/ClickHouse/ClickHouse/issues/29191): Fix segfault while inserting into column with type LowCardinality(Nullable) in Avro input format. [#29132](https://github.com/ClickHouse/ClickHouse/pull/29132) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v21.9.5.16-stable.md b/docs/changelogs/v21.9.5.16-stable.md index e681322eb45..895e882d257 100644 --- a/docs/changelogs/v21.9.5.16-stable.md +++ b/docs/changelogs/v21.9.5.16-stable.md @@ -17,7 +17,7 @@ sidebar_label: 2022 * Backported in [#29972](https://github.com/ClickHouse/ClickHouse/issues/29972): Fix shutdown of `AccessControlManager`. Now there can't be reloading of the configuration after AccessControlManager has been destroyed. This PR fixes the flaky test [test_user_directories/test.py::test_relative_path](https://clickhouse-test-reports.s3.yandex.net/0/f0e3122507ed8bea3f177495531c7d56bcb32466/integration_tests_(thread).html). [#29951](https://github.com/ClickHouse/ClickHouse/pull/29951) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#30052](https://github.com/ClickHouse/ClickHouse/issues/30052): Fix releasing query ID and session ID at the end of query processing while handing gRPC call. This PR fixes flaky test [test_grpc_protocol/test.py::test_session](https://clickhouse-test-reports.s3.yandex.net/0/1ac03811a2df9717fa7c633d1af03def821d24b6/integration_tests_(memory).html). [#29954](https://github.com/ClickHouse/ClickHouse/pull/29954) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#29055](https://github.com/ClickHouse/ClickHouse/issues/29055): Fix invalid constant type conversion when nullable or lowcardinality primary key is used. [#28636](https://github.com/ClickHouse/ClickHouse/pull/28636) ([Amos Bird](https://github.com/amosbird)). * Backported in [#29107](https://github.com/ClickHouse/ClickHouse/issues/29107): Fix waiting for mutation with `mutations_sync=2`. [#28889](https://github.com/ClickHouse/ClickHouse/pull/28889) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v21.9.6.24-stable.md b/docs/changelogs/v21.9.6.24-stable.md index 2407c031873..890dc8d5d02 100644 --- a/docs/changelogs/v21.9.6.24-stable.md +++ b/docs/changelogs/v21.9.6.24-stable.md @@ -21,11 +21,11 @@ sidebar_label: 2022 * Backported in [#31371](https://github.com/ClickHouse/ClickHouse/issues/31371): Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). * Backported in [#31576](https://github.com/ClickHouse/ClickHouse/issues/31576): Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release +#### Bug Fix (user-visible misbehaviour in official stable release * Backported in [#30916](https://github.com/ClickHouse/ClickHouse/issues/30916): Fix `ORDER BY ... WITH FILL` with set `TO` and `FROM` and no rows in result set. [#30888](https://github.com/ClickHouse/ClickHouse/pull/30888) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#30823](https://github.com/ClickHouse/ClickHouse/issues/30823): Fix "Column is not under aggregate function and not in GROUP BY" with PREWHERE (Fixes: [#28461](https://github.com/ClickHouse/ClickHouse/issues/28461)). [#28502](https://github.com/ClickHouse/ClickHouse/pull/28502) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#30609](https://github.com/ClickHouse/ClickHouse/issues/30609): Fix bad optimizations of ORDER BY if it contains WITH FILL. This closes [#28908](https://github.com/ClickHouse/ClickHouse/issues/28908). This closes [#26049](https://github.com/ClickHouse/ClickHouse/issues/26049). [#28910](https://github.com/ClickHouse/ClickHouse/pull/28910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v22.1.1.2542-prestable.md b/docs/changelogs/v22.1.1.2542-prestable.md index 8d7bb015db6..cacd13c1e12 100644 --- a/docs/changelogs/v22.1.1.2542-prestable.md +++ b/docs/changelogs/v22.1.1.2542-prestable.md @@ -118,7 +118,7 @@ sidebar_label: 2022 * Remove editing /etc/hosts from Dockerfile. [#33635](https://github.com/ClickHouse/ClickHouse/pull/33635) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Properly separate thrift-cmake from arrow-cmake after https://github.com/ClickHouse/ClickHouse/pull/31104 . cc @taiyang-li. [#33661](https://github.com/ClickHouse/ClickHouse/pull/33661) ([Amos Bird](https://github.com/amosbird)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fixed CAST from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)). * Fix base64Encode adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). @@ -185,7 +185,7 @@ sidebar_label: 2022 * Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). * Fix wrong database for JOIN w/o explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release): +#### Bug Fix (user-visible misbehaviour in official stable release): * Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v22.1.3.7-stable.md b/docs/changelogs/v22.1.3.7-stable.md index a92a82be290..fd8787f0e75 100644 --- a/docs/changelogs/v22.1.3.7-stable.md +++ b/docs/changelogs/v22.1.3.7-stable.md @@ -10,6 +10,6 @@ sidebar_label: 2022 #### Improvement * Backported in [#33793](https://github.com/ClickHouse/ClickHouse/issues/33793): Create parent directories in DiskS3::restoreFileOperations method. [#33730](https://github.com/ClickHouse/ClickHouse/pull/33730) ([ianton-ru](https://github.com/ianton-ru)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#33898](https://github.com/ClickHouse/ClickHouse/issues/33898): Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.1.4.30-stable.md b/docs/changelogs/v22.1.4.30-stable.md index c4286d7b64d..b0437382a46 100644 --- a/docs/changelogs/v22.1.4.30-stable.md +++ b/docs/changelogs/v22.1.4.30-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backport CI checks to 22.1 release branch. [#34897](https://github.com/ClickHouse/ClickHouse/pull/34897) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#34119](https://github.com/ClickHouse/ClickHouse/issues/34119): Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#34124](https://github.com/ClickHouse/ClickHouse/issues/34124): Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.10.1.1877-stable.md b/docs/changelogs/v22.10.1.1877-stable.md index 5b573a3faa4..23c58447c99 100644 --- a/docs/changelogs/v22.10.1.1877-stable.md +++ b/docs/changelogs/v22.10.1.1877-stable.md @@ -105,7 +105,7 @@ sidebar_label: 2022 * Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix power8 support. [#42462](https://github.com/ClickHouse/ClickHouse/pull/42462) ([Boris Kuschel](https://github.com/bkuschel)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Several fixes for DiskWeb. [#41652](https://github.com/ClickHouse/ClickHouse/pull/41652) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fixes issue when docker run will fail if "https_port" is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -147,7 +147,7 @@ sidebar_label: 2022 * This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix function `arrayElement` with type `Map` with `Nullable` values and `Nullable` index. [#42623](https://github.com/ClickHouse/ClickHouse/pull/42623) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)). diff --git a/docs/changelogs/v22.10.2.11-stable.md b/docs/changelogs/v22.10.2.11-stable.md index 4e3c382f5a7..196d3fbde80 100644 --- a/docs/changelogs/v22.10.2.11-stable.md +++ b/docs/changelogs/v22.10.2.11-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.10.2.11-stable (d2bfcaba002) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42750](https://github.com/ClickHouse/ClickHouse/issues/42750): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). * Backported in [#42793](https://github.com/ClickHouse/ClickHouse/issues/42793): Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). diff --git a/docs/changelogs/v22.10.3.27-stable.md b/docs/changelogs/v22.10.3.27-stable.md index 6dc9fd7f3b9..6e0188ad619 100644 --- a/docs/changelogs/v22.10.3.27-stable.md +++ b/docs/changelogs/v22.10.3.27-stable.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#42959](https://github.com/ClickHouse/ClickHouse/issues/42959): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#43042](https://github.com/ClickHouse/ClickHouse/issues/43042): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42864](https://github.com/ClickHouse/ClickHouse/issues/42864): Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#43173](https://github.com/ClickHouse/ClickHouse/issues/43173): Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.10.4.23-stable.md b/docs/changelogs/v22.10.4.23-stable.md index 04eb8be982f..4438a3470fc 100644 --- a/docs/changelogs/v22.10.4.23-stable.md +++ b/docs/changelogs/v22.10.4.23-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#43053](https://github.com/ClickHouse/ClickHouse/issues/43053): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43715](https://github.com/ClickHouse/ClickHouse/issues/43715): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). * Backported in [#43576](https://github.com/ClickHouse/ClickHouse/issues/43576): Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v22.10.5.54-stable.md b/docs/changelogs/v22.10.5.54-stable.md index e372fb30618..73c15cf985d 100644 --- a/docs/changelogs/v22.10.5.54-stable.md +++ b/docs/changelogs/v22.10.5.54-stable.md @@ -17,7 +17,7 @@ sidebar_label: 2023 * Backported in [#44379](https://github.com/ClickHouse/ClickHouse/issues/44379): In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44559](https://github.com/ClickHouse/ClickHouse/issues/44559): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#44754](https://github.com/ClickHouse/ClickHouse/issues/44754): [#40651](https://github.com/ClickHouse/ClickHouse/issues/40651) [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). * Backported in [#43527](https://github.com/ClickHouse/ClickHouse/issues/43527): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). @@ -41,4 +41,3 @@ sidebar_label: 2023 * Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix crash on delete from materialized view [#44705](https://github.com/ClickHouse/ClickHouse/pull/44705) ([Alexander Gololobov](https://github.com/davenger)). * Do not check read result consistency when unwinding [#44956](https://github.com/ClickHouse/ClickHouse/pull/44956) ([Alexander Gololobov](https://github.com/davenger)). - diff --git a/docs/changelogs/v22.10.6.3-stable.md b/docs/changelogs/v22.10.6.3-stable.md index b0e88c92cb0..c09e9a8670b 100644 --- a/docs/changelogs/v22.10.6.3-stable.md +++ b/docs/changelogs/v22.10.6.3-stable.md @@ -7,7 +7,6 @@ sidebar_label: 2023 ### ClickHouse release v22.10.6.3-stable (645a66d221f) FIXME as compared to v22.10.5.54-stable (dbc7984dc3b) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45084](https://github.com/ClickHouse/ClickHouse/issues/45084): fix alter table ttl error when wide part has light weight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). - diff --git a/docs/changelogs/v22.10.7.13-stable.md b/docs/changelogs/v22.10.7.13-stable.md index c906e00e524..987bf6cf1e1 100644 --- a/docs/changelogs/v22.10.7.13-stable.md +++ b/docs/changelogs/v22.10.7.13-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2023 ### ClickHouse release v22.10.7.13-stable (d261d9036cc) FIXME as compared to v22.10.6.3-stable (645a66d221f) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#44998](https://github.com/ClickHouse/ClickHouse/issues/44998): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#45551](https://github.com/ClickHouse/ClickHouse/issues/45551): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). @@ -18,4 +18,3 @@ sidebar_label: 2023 * Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v22.11.1.1360-stable.md b/docs/changelogs/v22.11.1.1360-stable.md index 1da53be02b7..4aa110484f8 100644 --- a/docs/changelogs/v22.11.1.1360-stable.md +++ b/docs/changelogs/v22.11.1.1360-stable.md @@ -81,7 +81,7 @@ sidebar_label: 2022 * Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). * Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). diff --git a/docs/changelogs/v22.11.2.30-stable.md b/docs/changelogs/v22.11.2.30-stable.md index 7b2febe072a..4759aa4a503 100644 --- a/docs/changelogs/v22.11.2.30-stable.md +++ b/docs/changelogs/v22.11.2.30-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Improvement * Backported in [#43511](https://github.com/ClickHouse/ClickHouse/issues/43511): Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43716](https://github.com/ClickHouse/ClickHouse/issues/43716): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). * Backported in [#43431](https://github.com/ClickHouse/ClickHouse/issues/43431): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/docs/changelogs/v22.11.3.47-stable.md b/docs/changelogs/v22.11.3.47-stable.md index d6451b853f7..a993ff8516f 100644 --- a/docs/changelogs/v22.11.3.47-stable.md +++ b/docs/changelogs/v22.11.3.47-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2023 * Backported in [#44380](https://github.com/ClickHouse/ClickHouse/issues/44380): In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44560](https://github.com/ClickHouse/ClickHouse/issues/44560): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#44756](https://github.com/ClickHouse/ClickHouse/issues/44756): [#40651](https://github.com/ClickHouse/ClickHouse/issues/40651) [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). * Backported in [#43528](https://github.com/ClickHouse/ClickHouse/issues/43528): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). @@ -37,4 +37,3 @@ sidebar_label: 2023 * Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix crash on delete from materialized view [#44705](https://github.com/ClickHouse/ClickHouse/pull/44705) ([Alexander Gololobov](https://github.com/davenger)). * Do not check read result consistency when unwinding [#44956](https://github.com/ClickHouse/ClickHouse/pull/44956) ([Alexander Gololobov](https://github.com/davenger)). - diff --git a/docs/changelogs/v22.11.4.3-stable.md b/docs/changelogs/v22.11.4.3-stable.md index 33780e848ef..b0e7586277a 100644 --- a/docs/changelogs/v22.11.4.3-stable.md +++ b/docs/changelogs/v22.11.4.3-stable.md @@ -7,7 +7,6 @@ sidebar_label: 2023 ### ClickHouse release v22.11.4.3-stable (7f4cf554f69) FIXME as compared to v22.11.3.47-stable (1c49d124a37) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45085](https://github.com/ClickHouse/ClickHouse/issues/45085): fix alter table ttl error when wide part has light weight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). - diff --git a/docs/changelogs/v22.11.5.15-stable.md b/docs/changelogs/v22.11.5.15-stable.md index 742a8740514..f11a01c4b1c 100644 --- a/docs/changelogs/v22.11.5.15-stable.md +++ b/docs/changelogs/v22.11.5.15-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2023 ### ClickHouse release v22.11.5.15-stable (d763e5a9239) FIXME as compared to v22.11.4.3-stable (7f4cf554f69) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#44999](https://github.com/ClickHouse/ClickHouse/issues/44999): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#45552](https://github.com/ClickHouse/ClickHouse/issues/45552): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). @@ -19,4 +19,3 @@ sidebar_label: 2023 * Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v22.11.6.44-stable.md b/docs/changelogs/v22.11.6.44-stable.md index 6e628b85150..db19e73c666 100644 --- a/docs/changelogs/v22.11.6.44-stable.md +++ b/docs/changelogs/v22.11.6.44-stable.md @@ -17,7 +17,7 @@ sidebar_label: 2023 * Backported in [#46483](https://github.com/ClickHouse/ClickHouse/issues/46483): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#46507](https://github.com/ClickHouse/ClickHouse/issues/46507): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45903](https://github.com/ClickHouse/ClickHouse/issues/45903): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Backported in [#46239](https://github.com/ClickHouse/ClickHouse/issues/46239): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). @@ -34,4 +34,3 @@ sidebar_label: 2023 * Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v22.12.1.1752-stable.md b/docs/changelogs/v22.12.1.1752-stable.md index 4f4c4b11150..1549af037f2 100644 --- a/docs/changelogs/v22.12.1.1752-stable.md +++ b/docs/changelogs/v22.12.1.1752-stable.md @@ -91,7 +91,7 @@ sidebar_label: 2022 * Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Fixed unable to log in (because of failure to create session_log entry) in rare case of messed up setting profiles. ... [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). * Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.12.2.25-stable.md b/docs/changelogs/v22.12.2.25-stable.md index 194d0fe3cbc..968854d0428 100644 --- a/docs/changelogs/v22.12.2.25-stable.md +++ b/docs/changelogs/v22.12.2.25-stable.md @@ -11,7 +11,7 @@ sidebar_label: 2023 * Backported in [#44381](https://github.com/ClickHouse/ClickHouse/issues/44381): In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44561](https://github.com/ClickHouse/ClickHouse/issues/44561): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#44739](https://github.com/ClickHouse/ClickHouse/issues/44739): [#40651](https://github.com/ClickHouse/ClickHouse/issues/40651) [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). * Backported in [#44764](https://github.com/ClickHouse/ClickHouse/issues/44764): Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v22.12.3.5-stable.md b/docs/changelogs/v22.12.3.5-stable.md index 8cbcbc6a590..5f0cc9cebf3 100644 --- a/docs/changelogs/v22.12.3.5-stable.md +++ b/docs/changelogs/v22.12.3.5-stable.md @@ -7,11 +7,10 @@ sidebar_label: 2023 ### ClickHouse release v22.12.3.5-stable (893de538f02) FIXME as compared to v22.12.2.25-stable (c790cfd4465) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45086](https://github.com/ClickHouse/ClickHouse/issues/45086): fix alter table ttl error when wide part has light weight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). #### NOT FOR CHANGELOG / INSIGNIFICANT * Do not check read result consistency when unwinding [#44956](https://github.com/ClickHouse/ClickHouse/pull/44956) ([Alexander Gololobov](https://github.com/davenger)). - diff --git a/docs/changelogs/v22.12.4.76-stable.md b/docs/changelogs/v22.12.4.76-stable.md new file mode 100644 index 00000000000..cdadaae7f7b --- /dev/null +++ b/docs/changelogs/v22.12.4.76-stable.md @@ -0,0 +1,54 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.12.4.76-stable (cb5772db805) FIXME as compared to v22.12.3.5-stable (893de538f02) + +#### Performance Improvement +* Backported in [#45704](https://github.com/ClickHouse/ClickHouse/issues/45704): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46378](https://github.com/ClickHouse/ClickHouse/issues/46378): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Bug Fix +* Backported in [#45672](https://github.com/ClickHouse/ClickHouse/issues/45672): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Backported in [#45200](https://github.com/ClickHouse/ClickHouse/issues/45200): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46116](https://github.com/ClickHouse/ClickHouse/issues/46116): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46035](https://github.com/ClickHouse/ClickHouse/issues/46035): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46484](https://github.com/ClickHouse/ClickHouse/issues/46484): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46509](https://github.com/ClickHouse/ClickHouse/issues/46509): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#47058](https://github.com/ClickHouse/ClickHouse/issues/47058): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#45904](https://github.com/ClickHouse/ClickHouse/issues/45904): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#45321](https://github.com/ClickHouse/ClickHouse/issues/45321): Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#45000](https://github.com/ClickHouse/ClickHouse/issues/45000): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#45553](https://github.com/ClickHouse/ClickHouse/issues/45553): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46226](https://github.com/ClickHouse/ClickHouse/issues/46226): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#46218](https://github.com/ClickHouse/ClickHouse/issues/46218): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#46446](https://github.com/ClickHouse/ClickHouse/issues/46446): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46678](https://github.com/ClickHouse/ClickHouse/issues/46678): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46872](https://github.com/ClickHouse/ClickHouse/issues/46872): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46954](https://github.com/ClickHouse/ClickHouse/issues/46954): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v22.12.5.34-stable.md b/docs/changelogs/v22.12.5.34-stable.md new file mode 100644 index 00000000000..61f099462a0 --- /dev/null +++ b/docs/changelogs/v22.12.5.34-stable.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.12.5.34-stable (b82d6401ca1) FIXME as compared to v22.12.4.76-stable (cb5772db805) + +#### Improvement +* Backported in [#46983](https://github.com/ClickHouse/ClickHouse/issues/46983): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#45729](https://github.com/ClickHouse/ClickHouse/issues/45729): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#46398](https://github.com/ClickHouse/ClickHouse/issues/46398): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46903](https://github.com/ClickHouse/ClickHouse/issues/46903): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47210](https://github.com/ClickHouse/ClickHouse/issues/47210): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#47157](https://github.com/ClickHouse/ClickHouse/issues/47157): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46881](https://github.com/ClickHouse/ClickHouse/issues/46881): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#47359](https://github.com/ClickHouse/ClickHouse/issues/47359): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v22.12.6.22-stable.md b/docs/changelogs/v22.12.6.22-stable.md new file mode 100644 index 00000000000..f0bf7c92340 --- /dev/null +++ b/docs/changelogs/v22.12.6.22-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.12.6.22-stable (10d87f90261) FIXME as compared to v22.12.5.34-stable (b82d6401ca1) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix changing an expired role [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)). +* Fix NOT_IMPLEMENTED error with CROSS JOIN and algorithm = auto [#47068](https://github.com/ClickHouse/ClickHouse/pull/47068) ([Vladimir C](https://github.com/vdimir)). +* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Hotfix for too verbose warnings in HTTP [#47903](https://github.com/ClickHouse/ClickHouse/pull/47903) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Better error messages in ReplicatedMergeTreeAttachThread [#47454](https://github.com/ClickHouse/ClickHouse/pull/47454) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v22.2.1.2139-prestable.md b/docs/changelogs/v22.2.1.2139-prestable.md index 67db2d9a18d..ca3a84ceaa2 100644 --- a/docs/changelogs/v22.2.1.2139-prestable.md +++ b/docs/changelogs/v22.2.1.2139-prestable.md @@ -141,7 +141,7 @@ sidebar_label: 2022 * - Rework version_helper, make it executable - Reimplement StorageSystemContributors.sh in version_helper - Create a release script. [#34641](https://github.com/ClickHouse/ClickHouse/pull/34641) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * - Fix path in workflows/release.yml - To be backported to branch 22.1. [#34646](https://github.com/ClickHouse/ClickHouse/pull/34646) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix lz4 compression for output. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)). * Create a function escapeForLDAPFilter and use it to escape characters '(' and ')' in a final_user_dn variable. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)). diff --git a/docs/changelogs/v22.2.3.5-stable.md b/docs/changelogs/v22.2.3.5-stable.md index c433669049d..a4368e465aa 100644 --- a/docs/changelogs/v22.2.3.5-stable.md +++ b/docs/changelogs/v22.2.3.5-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.2.3.5-stable FIXME as compared to v22.2.2.1-stable -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#34848](https://github.com/ClickHouse/ClickHouse/issues/34848): Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). diff --git a/docs/changelogs/v22.3.1.1262-prestable.md b/docs/changelogs/v22.3.1.1262-prestable.md index e6203ff18d1..385393cef17 100644 --- a/docs/changelogs/v22.3.1.1262-prestable.md +++ b/docs/changelogs/v22.3.1.1262-prestable.md @@ -95,7 +95,7 @@ sidebar_label: 2022 * Clion has the following problems "The breakpoint will not currently be hit. No executable code is associated with this line". [#35179](https://github.com/ClickHouse/ClickHouse/pull/35179) ([小路](https://github.com/nicelulu)). * Add an ability to build stripped binaries with cmake. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix distributed subquery max_query_size limitation inconsistency. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). * Fix incorrect trivial count result when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). diff --git a/docs/changelogs/v22.3.10.22-lts.md b/docs/changelogs/v22.3.10.22-lts.md index a43b8301aad..cc033eb707d 100644 --- a/docs/changelogs/v22.3.10.22-lts.md +++ b/docs/changelogs/v22.3.10.22-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#39761](https://github.com/ClickHouse/ClickHouse/issues/39761): Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#39206](https://github.com/ClickHouse/ClickHouse/issues/39206): Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#39381](https://github.com/ClickHouse/ClickHouse/issues/39381): Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -18,7 +18,7 @@ sidebar_label: 2022 * Backported in [#39610](https://github.com/ClickHouse/ClickHouse/issues/39610): Fix bug with maxsplit argument for splitByChar, which was not working correctly. [#39552](https://github.com/ClickHouse/ClickHouse/pull/39552) ([filimonov](https://github.com/filimonov)). * Backported in [#39834](https://github.com/ClickHouse/ClickHouse/issues/39834): Fix `CANNOT_READ_ALL_DATA` exception with `local_filesystem_read_method=pread_threadpool`. This bug affected only Linux kernel version 5.9 and 5.10 according to [man](https://manpages.debian.org/testing/manpages-dev/preadv2.2.en.html#BUGS). [#39800](https://github.com/ClickHouse/ClickHouse/pull/39800) ([Anton Popov](https://github.com/CurtizJ)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#39238](https://github.com/ClickHouse/ClickHouse/issues/39238): Fix performance regression of scalar query optimization. [#35986](https://github.com/ClickHouse/ClickHouse/pull/35986) ([Amos Bird](https://github.com/amosbird)). * Backported in [#39531](https://github.com/ClickHouse/ClickHouse/issues/39531): Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)). diff --git a/docs/changelogs/v22.3.11.12-lts.md b/docs/changelogs/v22.3.11.12-lts.md index e718493d9a0..58df0c0cadb 100644 --- a/docs/changelogs/v22.3.11.12-lts.md +++ b/docs/changelogs/v22.3.11.12-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#39881](https://github.com/ClickHouse/ClickHouse/issues/39881): Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#39336](https://github.com/ClickHouse/ClickHouse/issues/39336): Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.3.12.19-lts.md b/docs/changelogs/v22.3.12.19-lts.md index 4f6342419f7..6ae342583a9 100644 --- a/docs/changelogs/v22.3.12.19-lts.md +++ b/docs/changelogs/v22.3.12.19-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#40695](https://github.com/ClickHouse/ClickHouse/issues/40695): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40160](https://github.com/ClickHouse/ClickHouse/issues/40160): fix HashMethodOneNumber get wrong key value when column is const. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)). * Backported in [#40122](https://github.com/ClickHouse/ClickHouse/issues/40122): Fix bug in collectFilesToSkip() by adding correct file extension(.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)). diff --git a/docs/changelogs/v22.3.13.80-lts.md b/docs/changelogs/v22.3.13.80-lts.md index bb84da9a96a..3d0a6a77276 100644 --- a/docs/changelogs/v22.3.13.80-lts.md +++ b/docs/changelogs/v22.3.13.80-lts.md @@ -17,7 +17,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#41557](https://github.com/ClickHouse/ClickHouse/issues/41557): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40745](https://github.com/ClickHouse/ClickHouse/issues/40745): * Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)). * Backported in [#41812](https://github.com/ClickHouse/ClickHouse/issues/41812): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -36,7 +36,7 @@ sidebar_label: 2022 * Backported in [#41639](https://github.com/ClickHouse/ClickHouse/issues/41639): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#41899](https://github.com/ClickHouse/ClickHouse/issues/41899): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#41321](https://github.com/ClickHouse/ClickHouse/issues/41321): Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector to DB::ColumnVector'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v22.3.14.18-lts.md b/docs/changelogs/v22.3.14.18-lts.md index 88801b268ce..235525bbe81 100644 --- a/docs/changelogs/v22.3.14.18-lts.md +++ b/docs/changelogs/v22.3.14.18-lts.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#42328](https://github.com/ClickHouse/ClickHouse/issues/42328): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). * Backported in [#42358](https://github.com/ClickHouse/ClickHouse/issues/42358): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42298](https://github.com/ClickHouse/ClickHouse/issues/42298): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Backported in [#42592](https://github.com/ClickHouse/ClickHouse/issues/42592): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v22.3.14.23-lts.md b/docs/changelogs/v22.3.14.23-lts.md index 0a8c645702e..e086e223fb1 100644 --- a/docs/changelogs/v22.3.14.23-lts.md +++ b/docs/changelogs/v22.3.14.23-lts.md @@ -17,7 +17,7 @@ sidebar_label: 2022 * Backported in [#42328](https://github.com/ClickHouse/ClickHouse/issues/42328): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). * Backported in [#42358](https://github.com/ClickHouse/ClickHouse/issues/42358): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42298](https://github.com/ClickHouse/ClickHouse/issues/42298): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Backported in [#42592](https://github.com/ClickHouse/ClickHouse/issues/42592): This closes [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v22.3.15.33-lts.md b/docs/changelogs/v22.3.15.33-lts.md index e59bf8bd1cd..3f675bfba1e 100644 --- a/docs/changelogs/v22.3.15.33-lts.md +++ b/docs/changelogs/v22.3.15.33-lts.md @@ -18,7 +18,7 @@ sidebar_label: 2022 * Backported in [#42963](https://github.com/ClickHouse/ClickHouse/issues/42963): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#43039](https://github.com/ClickHouse/ClickHouse/issues/43039): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43427](https://github.com/ClickHouse/ClickHouse/issues/43427): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). * Backported in [#43720](https://github.com/ClickHouse/ClickHouse/issues/43720): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). diff --git a/docs/changelogs/v22.3.16.1190-lts.md b/docs/changelogs/v22.3.16.1190-lts.md index a43d34551ca..b65a6484109 100644 --- a/docs/changelogs/v22.3.16.1190-lts.md +++ b/docs/changelogs/v22.3.16.1190-lts.md @@ -18,7 +18,7 @@ sidebar_label: 2023 * Backported in [#44431](https://github.com/ClickHouse/ClickHouse/issues/44431): Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44557](https://github.com/ClickHouse/ClickHouse/issues/44557): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43512](https://github.com/ClickHouse/ClickHouse/issues/43512): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). * Backported in [#43750](https://github.com/ClickHouse/ClickHouse/issues/43750): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). @@ -30,4 +30,3 @@ sidebar_label: 2023 #### NO CL ENTRY * NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.3'. [#44217](https://github.com/ClickHouse/ClickHouse/pull/44217) ([ianton-ru](https://github.com/ianton-ru)). - diff --git a/docs/changelogs/v22.3.18.37-lts.md b/docs/changelogs/v22.3.18.37-lts.md index ff6378f09ad..5a9004e7f98 100644 --- a/docs/changelogs/v22.3.18.37-lts.md +++ b/docs/changelogs/v22.3.18.37-lts.md @@ -14,7 +14,7 @@ sidebar_label: 2023 #### Build/Testing/Packaging Improvement * Backported in [#45856](https://github.com/ClickHouse/ClickHouse/issues/45856): Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45620](https://github.com/ClickHouse/ClickHouse/issues/45620): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#45549](https://github.com/ClickHouse/ClickHouse/issues/45549): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). @@ -30,4 +30,3 @@ sidebar_label: 2023 * Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - diff --git a/docs/changelogs/v22.3.19.6-lts.md b/docs/changelogs/v22.3.19.6-lts.md index d5b45f4ce66..dc34ece918d 100644 --- a/docs/changelogs/v22.3.19.6-lts.md +++ b/docs/changelogs/v22.3.19.6-lts.md @@ -7,11 +7,10 @@ sidebar_label: 2023 ### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). #### NOT FOR CHANGELOG / INSIGNIFICANT * Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v22.3.20.29-lts.md b/docs/changelogs/v22.3.20.29-lts.md new file mode 100644 index 00000000000..a54a320c4c1 --- /dev/null +++ b/docs/changelogs/v22.3.20.29-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.3.20.29-lts (297b4dd5e55) FIXME as compared to v22.3.19.6-lts (467e0a7bd77) + +#### Improvement +* Backported in [#46979](https://github.com/ClickHouse/ClickHouse/issues/46979): Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix incorrect alias recursion in QueryNormalizer [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Fix arithmetic operations in aggregate optimization [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Fix MSan report in `maxIntersections` function [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong results of some LIKE searches when the LIKE pattern contains quoted non-quotable characters [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible deadlock in QueryStatus [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v22.3.3.44-lts.md b/docs/changelogs/v22.3.3.44-lts.md index 3d113d45e68..bf491e46915 100644 --- a/docs/changelogs/v22.3.3.44-lts.md +++ b/docs/changelogs/v22.3.3.44-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#35928](https://github.com/ClickHouse/ClickHouse/issues/35928): Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#35415](https://github.com/ClickHouse/ClickHouse/issues/35415): Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#35563](https://github.com/ClickHouse/ClickHouse/issues/35563): Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)). diff --git a/docs/changelogs/v22.3.4.20-lts.md b/docs/changelogs/v22.3.4.20-lts.md index 72d7b90c743..070ad961beb 100644 --- a/docs/changelogs/v22.3.4.20-lts.md +++ b/docs/changelogs/v22.3.4.20-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * - Add `_le_` method for ClickHouseVersion - Fix auto_version for existing tag - docker_server now support getting version from tags - Add python unit tests to backport workflow. [#36028](https://github.com/ClickHouse/ClickHouse/pull/36028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36244](https://github.com/ClickHouse/ClickHouse/issues/36244): Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#36240](https://github.com/ClickHouse/ClickHouse/issues/36240): Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.3.5.5-lts.md b/docs/changelogs/v22.3.5.5-lts.md index b4205d5e1a9..4a5dc318b9f 100644 --- a/docs/changelogs/v22.3.5.5-lts.md +++ b/docs/changelogs/v22.3.5.5-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.3.5.5-lts (438b4a81f77) FIXME as compared to v22.3.4.20-lts (ecbaf001f49) -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#36674](https://github.com/ClickHouse/ClickHouse/issues/36674): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.3.6.5-lts.md b/docs/changelogs/v22.3.6.5-lts.md index 4183332efb0..90e65c7445f 100644 --- a/docs/changelogs/v22.3.6.5-lts.md +++ b/docs/changelogs/v22.3.6.5-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.3.6.5-lts (3e44e824cff) FIXME as compared to v22.3.5.5-lts (438b4a81f77) -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36795](https://github.com/ClickHouse/ClickHouse/issues/36795): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.3.7.28-lts.md b/docs/changelogs/v22.3.7.28-lts.md index 7347e8e0705..a6a7885abc3 100644 --- a/docs/changelogs/v22.3.7.28-lts.md +++ b/docs/changelogs/v22.3.7.28-lts.md @@ -7,14 +7,14 @@ sidebar_label: 2022 ### ClickHouse release v22.3.7.28-lts (420bdfa2751) FIXME as compared to v22.3.6.5-lts (3e44e824cff) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#37715](https://github.com/ClickHouse/ClickHouse/issues/37715): Fix unexpected errors with a clash of constant strings in aggregate function, prewhere and join. Close [#36891](https://github.com/ClickHouse/ClickHouse/issues/36891). [#37336](https://github.com/ClickHouse/ClickHouse/pull/37336) ([Vladimir C](https://github.com/vdimir)). * Backported in [#37511](https://github.com/ClickHouse/ClickHouse/issues/37511): Fix logical error in normalizeUTF8 functions. Closes [#37298](https://github.com/ClickHouse/ClickHouse/issues/37298). [#37443](https://github.com/ClickHouse/ClickHouse/pull/37443) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#37843](https://github.com/ClickHouse/ClickHouse/issues/37843): Fix segmentation fault in `show create table` from mysql database when it is configured with named collections. Closes [#37683](https://github.com/ClickHouse/ClickHouse/issues/37683). [#37690](https://github.com/ClickHouse/ClickHouse/pull/37690) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#37940](https://github.com/ClickHouse/ClickHouse/issues/37940): Fix setting cast_ipv4_ipv6_default_on_conversion_error for internal cast function. Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#37761](https://github.com/ClickHouse/ClickHouse/pull/37761) ([Maksim Kita](https://github.com/kitaisreal)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#37926](https://github.com/ClickHouse/ClickHouse/issues/37926): Fix check asof join key nullability, close [#35565](https://github.com/ClickHouse/ClickHouse/issues/35565). [#35674](https://github.com/ClickHouse/ClickHouse/pull/35674) ([Vladimir C](https://github.com/vdimir)). * Backported in [#37172](https://github.com/ClickHouse/ClickHouse/issues/37172): Fix bug in indexes of not presented columns in -WithNames formats that led to error `INCORRECT_NUMBER_OF_COLUMNS ` when the number of columns is more than 256. Closes [#35793](https://github.com/ClickHouse/ClickHouse/issues/35793). [#35803](https://github.com/ClickHouse/ClickHouse/pull/35803) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v22.3.8.39-lts.md b/docs/changelogs/v22.3.8.39-lts.md index 8fff7f00a01..7f9363207d3 100644 --- a/docs/changelogs/v22.3.8.39-lts.md +++ b/docs/changelogs/v22.3.8.39-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#38826](https://github.com/ClickHouse/ClickHouse/issues/38826): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#38453](https://github.com/ClickHouse/ClickHouse/issues/38453): Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)). * Backported in [#38710](https://github.com/ClickHouse/ClickHouse/issues/38710): Fix incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)). @@ -18,7 +18,7 @@ sidebar_label: 2022 * Backported in [#38776](https://github.com/ClickHouse/ClickHouse/issues/38776): `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Backported in [#38780](https://github.com/ClickHouse/ClickHouse/issues/38780): Fix use-after-free for Map combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36818](https://github.com/ClickHouse/ClickHouse/issues/36818): Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)). * Backported in [#38467](https://github.com/ClickHouse/ClickHouse/issues/38467): - Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). diff --git a/docs/changelogs/v22.3.9.19-lts.md b/docs/changelogs/v22.3.9.19-lts.md index 084793f11cc..e0ebcd9b7b8 100644 --- a/docs/changelogs/v22.3.9.19-lts.md +++ b/docs/changelogs/v22.3.9.19-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.3.9.19-lts (7976930b82e) FIXME as compared to v22.3.8.39-lts (6bcf982f58b) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#39097](https://github.com/ClickHouse/ClickHouse/issues/39097): Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)). * Backported in [#39080](https://github.com/ClickHouse/ClickHouse/issues/39080): Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)). diff --git a/docs/changelogs/v22.4.1.2305-prestable.md b/docs/changelogs/v22.4.1.2305-prestable.md index 04ad0bf8955..b277137ca7e 100644 --- a/docs/changelogs/v22.4.1.2305-prestable.md +++ b/docs/changelogs/v22.4.1.2305-prestable.md @@ -9,7 +9,7 @@ sidebar_label: 2022 #### Backward Incompatible Change * Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `parser_settings_after_format_compact` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)). +* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `allow_settings_after_format_in_insert` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)). * Changed hashed path for cache files. [#36079](https://github.com/ClickHouse/ClickHouse/pull/36079) ([Kseniia Sumarokova](https://github.com/kssenii)). #### New Feature @@ -161,7 +161,7 @@ sidebar_label: 2022 * call RemoteQueryExecutor with original_query instead of an rewritten query, elimate the AMBIGUOUS_COLUMN_NAME exception. [#35748](https://github.com/ClickHouse/ClickHouse/pull/35748) ([lgbo](https://github.com/lgbo-ustc)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Disallow ALTER TTL for engines that does not support it, to avoid breaking ATTACH TABLE (closes [#33344](https://github.com/ClickHouse/ClickHouse/issues/33344)). [#33391](https://github.com/ClickHouse/ClickHouse/pull/33391) ([zhongyuankai](https://github.com/zhongyuankai)). * Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.4.3.3-stable.md b/docs/changelogs/v22.4.3.3-stable.md index 69f95d8cd27..58d0f7b69a1 100644 --- a/docs/changelogs/v22.4.3.3-stable.md +++ b/docs/changelogs/v22.4.3.3-stable.md @@ -7,6 +7,6 @@ sidebar_label: 2022 ### ClickHouse release v22.4.3.3-stable (def956d6299) FIXME as compared to v22.4.2.1-stable (b34ebdc36ae) -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). diff --git a/docs/changelogs/v22.4.4.7-stable.md b/docs/changelogs/v22.4.4.7-stable.md index 1dce0c50e0a..af94ecafcf6 100644 --- a/docs/changelogs/v22.4.4.7-stable.md +++ b/docs/changelogs/v22.4.4.7-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.4.4.7-stable (ba44414f9b3) FIXME as compared to v22.4.3.3-stable (def956d6299) -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#36673](https://github.com/ClickHouse/ClickHouse/issues/36673): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.4.5.9-stable.md b/docs/changelogs/v22.4.5.9-stable.md index 50cec91b12c..524b309f8ec 100644 --- a/docs/changelogs/v22.4.5.9-stable.md +++ b/docs/changelogs/v22.4.5.9-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.4.5.9-stable (059ef6cadcd) FIXME as compared to v22.4.4.7-stable (ba44414f9b3) -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#36635](https://github.com/ClickHouse/ClickHouse/issues/36635): Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Backported in [#36794](https://github.com/ClickHouse/ClickHouse/issues/36794): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). diff --git a/docs/changelogs/v22.4.6.53-stable.md b/docs/changelogs/v22.4.6.53-stable.md index 5dc25697c20..b6380d9c7ba 100644 --- a/docs/changelogs/v22.4.6.53-stable.md +++ b/docs/changelogs/v22.4.6.53-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#38828](https://github.com/ClickHouse/ClickHouse/issues/38828): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#37717](https://github.com/ClickHouse/ClickHouse/issues/37717): Fix unexpected errors with a clash of constant strings in aggregate function, prewhere and join. Close [#36891](https://github.com/ClickHouse/ClickHouse/issues/36891). [#37336](https://github.com/ClickHouse/ClickHouse/pull/37336) ([Vladimir C](https://github.com/vdimir)). * Backported in [#37512](https://github.com/ClickHouse/ClickHouse/issues/37512): Fix logical error in normalizeUTF8 functions. Closes [#37298](https://github.com/ClickHouse/ClickHouse/issues/37298). [#37443](https://github.com/ClickHouse/ClickHouse/pull/37443) ([Maksim Kita](https://github.com/kitaisreal)). @@ -27,7 +27,7 @@ sidebar_label: 2022 * Backported in [#38777](https://github.com/ClickHouse/ClickHouse/issues/38777): `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Backported in [#38781](https://github.com/ClickHouse/ClickHouse/issues/38781): Fix use-after-free for Map combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Backported in [#37456](https://github.com/ClickHouse/ClickHouse/issues/37456): Server might fail to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#38468](https://github.com/ClickHouse/ClickHouse/issues/38468): - Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). diff --git a/docs/changelogs/v22.5.1.2079-stable.md b/docs/changelogs/v22.5.1.2079-stable.md index fdd6325fd1f..28dfa0825cf 100644 --- a/docs/changelogs/v22.5.1.2079-stable.md +++ b/docs/changelogs/v22.5.1.2079-stable.md @@ -104,7 +104,7 @@ sidebar_label: 2022 * ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * - Make cmake build scripts a bit more robust. [#37169](https://github.com/ClickHouse/ClickHouse/pull/37169) ([Robert Schulze](https://github.com/rschu1ze)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * The ilike() function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)). * Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)). @@ -120,7 +120,7 @@ sidebar_label: 2022 * Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)). * Fix possible deadlock in OvercommitTracker during logging. cc @alesapin @tavplubix Fixes [#37272](https://github.com/ClickHouse/ClickHouse/issues/37272). [#37299](https://github.com/ClickHouse/ClickHouse/pull/37299) ([Dmitry Novik](https://github.com/novikd)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * - fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). * Accidentally ZSTD support for Arrow was not being built. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)). diff --git a/docs/changelogs/v22.5.2.53-stable.md b/docs/changelogs/v22.5.2.53-stable.md index f2fb50b3e14..8af0c7dac45 100644 --- a/docs/changelogs/v22.5.2.53-stable.md +++ b/docs/changelogs/v22.5.2.53-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#38827](https://github.com/ClickHouse/ClickHouse/issues/38827): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#37716](https://github.com/ClickHouse/ClickHouse/issues/37716): Fix unexpected errors with a clash of constant strings in aggregate function, prewhere and join. Close [#36891](https://github.com/ClickHouse/ClickHouse/issues/36891). [#37336](https://github.com/ClickHouse/ClickHouse/pull/37336) ([Vladimir C](https://github.com/vdimir)). * Backported in [#37408](https://github.com/ClickHouse/ClickHouse/issues/37408): Throw an exception when GROUPING SETS used with ROLLUP or CUBE. [#37367](https://github.com/ClickHouse/ClickHouse/pull/37367) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.5.3.21-stable.md b/docs/changelogs/v22.5.3.21-stable.md index 994bf79f7a8..4915a41a10c 100644 --- a/docs/changelogs/v22.5.3.21-stable.md +++ b/docs/changelogs/v22.5.3.21-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.5.3.21-stable (e03724efec5) FIXME as compared to v22.5.2.53-stable (5fd600fda9e) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#38241](https://github.com/ClickHouse/ClickHouse/issues/38241): Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#39098](https://github.com/ClickHouse/ClickHouse/issues/39098): Any allocations inside OvercommitTracker may lead to deadlock. Logging was not very informative so it's easier just to remove logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.5.4.19-stable.md b/docs/changelogs/v22.5.4.19-stable.md index 24903260904..c90f45c770e 100644 --- a/docs/changelogs/v22.5.4.19-stable.md +++ b/docs/changelogs/v22.5.4.19-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#39882](https://github.com/ClickHouse/ClickHouse/issues/39882): Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#39209](https://github.com/ClickHouse/ClickHouse/issues/39209): Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#39589](https://github.com/ClickHouse/ClickHouse/issues/39589): Fix data race and possible heap-buffer-overflow in Avro format. Closes [#39094](https://github.com/ClickHouse/ClickHouse/issues/39094) Closes [#33652](https://github.com/ClickHouse/ClickHouse/issues/33652). [#39498](https://github.com/ClickHouse/ClickHouse/pull/39498) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v22.6.1.1985-stable.md b/docs/changelogs/v22.6.1.1985-stable.md index 0437f682789..c915d24fe00 100644 --- a/docs/changelogs/v22.6.1.1985-stable.md +++ b/docs/changelogs/v22.6.1.1985-stable.md @@ -119,7 +119,7 @@ sidebar_label: 2022 * Fix overly aggressive stripping which removed the embedded hash required for checking the consistency of the executable. [#37993](https://github.com/ClickHouse/ClickHouse/pull/37993) ([Robert Schulze](https://github.com/rschu1ze)). * fix MacOS build compressor faild. [#38007](https://github.com/ClickHouse/ClickHouse/pull/38007) ([chen](https://github.com/xiedeyantu)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)). * Fix possible heap-use-after-free error when reading system.projection_parts and system.projection_parts_columns . This fixes [#37184](https://github.com/ClickHouse/ClickHouse/issues/37184). [#37185](https://github.com/ClickHouse/ClickHouse/pull/37185) ([Amos Bird](https://github.com/amosbird)). @@ -169,7 +169,7 @@ sidebar_label: 2022 * when open enable_filesystem_query_cache_limit, throw Reserved cache size exceeds the remaining cache size. [#38004](https://github.com/ClickHouse/ClickHouse/pull/38004) ([chen](https://github.com/xiedeyantu)). * Query, containing ORDER BY ... WITH FILL, can generate extra rows when multiple WITH FILL columns are present. [#38074](https://github.com/ClickHouse/ClickHouse/pull/38074) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#34775](https://github.com/ClickHouse/ClickHouse/pull/34775) ([Azat Khuzhin](https://github.com/azat)). * TTL merge may not be scheduled again if BackgroundExecutor is busy. --merges_with_ttl_counter is increased in selectPartsToMerge() --merge task will be ignored if BackgroundExecutor is busy --merges_with_ttl_counter will not be decrease. [#36387](https://github.com/ClickHouse/ClickHouse/pull/36387) ([lthaooo](https://github.com/lthaooo)). diff --git a/docs/changelogs/v22.6.2.12-stable.md b/docs/changelogs/v22.6.2.12-stable.md index d8c1cd31936..3c0f2116f1d 100644 --- a/docs/changelogs/v22.6.2.12-stable.md +++ b/docs/changelogs/v22.6.2.12-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Improvement * Backported in [#38484](https://github.com/ClickHouse/ClickHouse/issues/38484): Improve the stability for hive storage integration test. Move the data prepare step into test.py. [#38260](https://github.com/ClickHouse/ClickHouse/pull/38260) ([lgbo](https://github.com/lgbo-ustc)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#38404](https://github.com/ClickHouse/ClickHouse/issues/38404): Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/docs/changelogs/v22.6.3.35-stable.md b/docs/changelogs/v22.6.3.35-stable.md index 0a86c136d35..86b8282e075 100644 --- a/docs/changelogs/v22.6.3.35-stable.md +++ b/docs/changelogs/v22.6.3.35-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#38883](https://github.com/ClickHouse/ClickHouse/issues/38883): Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#38690](https://github.com/ClickHouse/ClickHouse/issues/38690): Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#38500](https://github.com/ClickHouse/ClickHouse/issues/38500): Do not allow recursive usage of OvercommitTracker during logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794) cc @tavplubix @davenger. [#38246](https://github.com/ClickHouse/ClickHouse/pull/38246) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.6.4.35-stable.md b/docs/changelogs/v22.6.4.35-stable.md index 5c4644f735a..2b5537b5bf9 100644 --- a/docs/changelogs/v22.6.4.35-stable.md +++ b/docs/changelogs/v22.6.4.35-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#38822](https://github.com/ClickHouse/ClickHouse/issues/38822): - Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#38242](https://github.com/ClickHouse/ClickHouse/issues/38242): Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#38865](https://github.com/ClickHouse/ClickHouse/issues/38865): Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)). diff --git a/docs/changelogs/v22.6.5.22-stable.md b/docs/changelogs/v22.6.5.22-stable.md index 5965f63df14..edb6bdf7c2d 100644 --- a/docs/changelogs/v22.6.5.22-stable.md +++ b/docs/changelogs/v22.6.5.22-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#39883](https://github.com/ClickHouse/ClickHouse/issues/39883): Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#39207](https://github.com/ClickHouse/ClickHouse/issues/39207): Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#38932](https://github.com/ClickHouse/ClickHouse/issues/38932): Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.6.6.16-stable.md b/docs/changelogs/v22.6.6.16-stable.md index 4d44621454b..d4d25771326 100644 --- a/docs/changelogs/v22.6.6.16-stable.md +++ b/docs/changelogs/v22.6.6.16-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.6.6.16-stable (d2a33ebc822) FIXME as compared to v22.6.5.22-stable (47ca5f14a34) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40162](https://github.com/ClickHouse/ClickHouse/issues/40162): fix HashMethodOneNumber get wrong key value when column is const. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)). * Backported in [#40124](https://github.com/ClickHouse/ClickHouse/issues/40124): Fix bug in collectFilesToSkip() by adding correct file extension(.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)). diff --git a/docs/changelogs/v22.6.7.7-stable.md b/docs/changelogs/v22.6.7.7-stable.md index f5351cc03ed..0b4cc6836f7 100644 --- a/docs/changelogs/v22.6.7.7-stable.md +++ b/docs/changelogs/v22.6.7.7-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#40692](https://github.com/ClickHouse/ClickHouse/issues/40692): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40531](https://github.com/ClickHouse/ClickHouse/issues/40531): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#40623](https://github.com/ClickHouse/ClickHouse/issues/40623): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v22.6.8.35-stable.md b/docs/changelogs/v22.6.8.35-stable.md index e68384d3d9c..b69cabffd4d 100644 --- a/docs/changelogs/v22.6.8.35-stable.md +++ b/docs/changelogs/v22.6.8.35-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#41274](https://github.com/ClickHouse/ClickHouse/issues/41274): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#41282](https://github.com/ClickHouse/ClickHouse/issues/41282): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). * Backported in [#40905](https://github.com/ClickHouse/ClickHouse/issues/40905): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)). diff --git a/docs/changelogs/v22.6.9.11-stable.md b/docs/changelogs/v22.6.9.11-stable.md index 5beb9171d9e..42cf0948ecc 100644 --- a/docs/changelogs/v22.6.9.11-stable.md +++ b/docs/changelogs/v22.6.9.11-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#41558](https://github.com/ClickHouse/ClickHouse/issues/41558): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#41504](https://github.com/ClickHouse/ClickHouse/issues/41504): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v22.7.1.2484-stable.md b/docs/changelogs/v22.7.1.2484-stable.md index 3f90b3691ea..7464b0449ee 100644 --- a/docs/changelogs/v22.7.1.2484-stable.md +++ b/docs/changelogs/v22.7.1.2484-stable.md @@ -128,7 +128,7 @@ sidebar_label: 2022 * Fix LSan by fixing getauxval(). [#39299](https://github.com/ClickHouse/ClickHouse/pull/39299) ([Azat Khuzhin](https://github.com/azat)). * Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)). * Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)). @@ -198,7 +198,7 @@ sidebar_label: 2022 * Fix UB (stack-use-after-scope) in extactAll(). [#39397](https://github.com/ClickHouse/ClickHouse/pull/39397) ([Azat Khuzhin](https://github.com/azat)). * Fix incorrect query result when trivial count optimization is in effect with array join. This fixes [#39431](https://github.com/ClickHouse/ClickHouse/issues/39431). [#39444](https://github.com/ClickHouse/ClickHouse/pull/39444) ([Amos Bird](https://github.com/amosbird)). -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) +#### Bug Fix (user-visible misbehaviour in official stable release) * Disable send_logs_level for INSERT into Distributed to avoid possible hung. [#35075](https://github.com/ClickHouse/ClickHouse/pull/35075) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.7.2.15-stable.md b/docs/changelogs/v22.7.2.15-stable.md index a9db8bcf10d..0a3748f90f7 100644 --- a/docs/changelogs/v22.7.2.15-stable.md +++ b/docs/changelogs/v22.7.2.15-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#39750](https://github.com/ClickHouse/ClickHouse/issues/39750): Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#39591](https://github.com/ClickHouse/ClickHouse/issues/39591): Fix data race and possible heap-buffer-overflow in Avro format. Closes [#39094](https://github.com/ClickHouse/ClickHouse/issues/39094) Closes [#33652](https://github.com/ClickHouse/ClickHouse/issues/33652). [#39498](https://github.com/ClickHouse/ClickHouse/pull/39498) ([Kruglov Pavel](https://github.com/Avogar)). * Backported in [#39613](https://github.com/ClickHouse/ClickHouse/issues/39613): Fix bug with maxsplit argument for splitByChar, which was not working correctly. [#39552](https://github.com/ClickHouse/ClickHouse/pull/39552) ([filimonov](https://github.com/filimonov)). diff --git a/docs/changelogs/v22.7.3.5-stable.md b/docs/changelogs/v22.7.3.5-stable.md index 62a5dfee611..b55b16509d4 100644 --- a/docs/changelogs/v22.7.3.5-stable.md +++ b/docs/changelogs/v22.7.3.5-stable.md @@ -11,7 +11,7 @@ sidebar_label: 2022 * Backported in [#39884](https://github.com/ClickHouse/ClickHouse/issues/39884): Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#39884](https://github.com/ClickHouse/ClickHouse/issues/39884): Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40045](https://github.com/ClickHouse/ClickHouse/issues/40045): Fix big memory usage during fetches. Fixes [#39915](https://github.com/ClickHouse/ClickHouse/issues/39915). [#39990](https://github.com/ClickHouse/ClickHouse/pull/39990) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#40045](https://github.com/ClickHouse/ClickHouse/issues/40045): Fix big memory usage during fetches. Fixes [#39915](https://github.com/ClickHouse/ClickHouse/issues/39915). [#39990](https://github.com/ClickHouse/ClickHouse/pull/39990) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). diff --git a/docs/changelogs/v22.7.4.16-stable.md b/docs/changelogs/v22.7.4.16-stable.md index 52d68283a2f..4847ef8cf64 100644 --- a/docs/changelogs/v22.7.4.16-stable.md +++ b/docs/changelogs/v22.7.4.16-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.7.4.16-stable (0b9272f8fdc) FIXME as compared to v22.7.3.5-stable (e140b8b5f3a) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40163](https://github.com/ClickHouse/ClickHouse/issues/40163): fix HashMethodOneNumber get wrong key value when column is const. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)). * Backported in [#40125](https://github.com/ClickHouse/ClickHouse/issues/40125): Fix bug in collectFilesToSkip() by adding correct file extension(.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)). diff --git a/docs/changelogs/v22.7.5.13-stable.md b/docs/changelogs/v22.7.5.13-stable.md index 0de9e9a26aa..24dbc5c9e7f 100644 --- a/docs/changelogs/v22.7.5.13-stable.md +++ b/docs/changelogs/v22.7.5.13-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#40693](https://github.com/ClickHouse/ClickHouse/issues/40693): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40542](https://github.com/ClickHouse/ClickHouse/issues/40542): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)). * Backported in [#40450](https://github.com/ClickHouse/ClickHouse/issues/40450): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)). diff --git a/docs/changelogs/v22.7.6.74-stable.md b/docs/changelogs/v22.7.6.74-stable.md index 9060375ce8e..3cf2edfccd1 100644 --- a/docs/changelogs/v22.7.6.74-stable.md +++ b/docs/changelogs/v22.7.6.74-stable.md @@ -16,7 +16,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#41559](https://github.com/ClickHouse/ClickHouse/issues/41559): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#41283](https://github.com/ClickHouse/ClickHouse/issues/41283): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). * Backported in [#40865](https://github.com/ClickHouse/ClickHouse/issues/40865): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)). diff --git a/docs/changelogs/v22.7.7.24-stable.md b/docs/changelogs/v22.7.7.24-stable.md index cc3a83c5d4c..16e56156ff6 100644 --- a/docs/changelogs/v22.7.7.24-stable.md +++ b/docs/changelogs/v22.7.7.24-stable.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#42329](https://github.com/ClickHouse/ClickHouse/issues/42329): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). * Backported in [#42359](https://github.com/ClickHouse/ClickHouse/issues/42359): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42268](https://github.com/ClickHouse/ClickHouse/issues/42268): Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). * Backported in [#42299](https://github.com/ClickHouse/ClickHouse/issues/42299): Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). diff --git a/docs/changelogs/v22.8.1.2097-lts.md b/docs/changelogs/v22.8.1.2097-lts.md index ef454a7e283..b6b634f4826 100644 --- a/docs/changelogs/v22.8.1.2097-lts.md +++ b/docs/changelogs/v22.8.1.2097-lts.md @@ -106,7 +106,7 @@ sidebar_label: 2022 * Support build with `clang-16` (trunk). This closes [#39949](https://github.com/ClickHouse/ClickHouse/issues/39949). [#40181](https://github.com/ClickHouse/ClickHouse/pull/40181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Prepare RISC-V 64 build to run in CI. This is for [#40141](https://github.com/ClickHouse/ClickHouse/issues/40141). [#40197](https://github.com/ClickHouse/ClickHouse/pull/40197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Fixed query hanging for SELECT with ORDER BY WITH FILL with different date/time types. [#37849](https://github.com/ClickHouse/ClickHouse/pull/37849) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Fix ORDER BY that matches projections ORDER BY (before it simply returns unsorted result). [#38725](https://github.com/ClickHouse/ClickHouse/pull/38725) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.8.10.29-lts.md b/docs/changelogs/v22.8.10.29-lts.md index ac41d71650c..33ae27b6da8 100644 --- a/docs/changelogs/v22.8.10.29-lts.md +++ b/docs/changelogs/v22.8.10.29-lts.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#43051](https://github.com/ClickHouse/ClickHouse/issues/43051): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43513](https://github.com/ClickHouse/ClickHouse/issues/43513): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). * Backported in [#43428](https://github.com/ClickHouse/ClickHouse/issues/43428): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/docs/changelogs/v22.8.11.15-lts.md b/docs/changelogs/v22.8.11.15-lts.md index 337eeba9187..dbe765ca4a2 100644 --- a/docs/changelogs/v22.8.11.15-lts.md +++ b/docs/changelogs/v22.8.11.15-lts.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Bug Fix * Backported in [#43098](https://github.com/ClickHouse/ClickHouse/issues/43098): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43751](https://github.com/ClickHouse/ClickHouse/issues/43751): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). * Backported in [#43617](https://github.com/ClickHouse/ClickHouse/issues/43617): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.8.12.45-lts.md b/docs/changelogs/v22.8.12.45-lts.md index 7412784419c..9fab9daeb95 100644 --- a/docs/changelogs/v22.8.12.45-lts.md +++ b/docs/changelogs/v22.8.12.45-lts.md @@ -16,7 +16,7 @@ sidebar_label: 2023 * Backported in [#44378](https://github.com/ClickHouse/ClickHouse/issues/44378): In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44558](https://github.com/ClickHouse/ClickHouse/issues/44558): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#44751](https://github.com/ClickHouse/ClickHouse/issues/44751): [#40651](https://github.com/ClickHouse/ClickHouse/issues/40651) [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). * Backported in [#43525](https://github.com/ClickHouse/ClickHouse/issues/43525): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). @@ -39,4 +39,3 @@ sidebar_label: 2023 * Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Do not check read result consistency when unwinding [#44956](https://github.com/ClickHouse/ClickHouse/pull/44956) ([Alexander Gololobov](https://github.com/davenger)). - diff --git a/docs/changelogs/v22.8.13.20-lts.md b/docs/changelogs/v22.8.13.20-lts.md index d8dd1bd2b1c..0734f40bf3e 100644 --- a/docs/changelogs/v22.8.13.20-lts.md +++ b/docs/changelogs/v22.8.13.20-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2023 ### ClickHouse release v22.8.13.20-lts (e4817946d18) FIXME as compared to v22.8.12.45-lts (86b0ecd5d51) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45565](https://github.com/ClickHouse/ClickHouse/issues/45565): Fix positional arguments exception Positional argument out of bounds. Closes [#40634](https://github.com/ClickHouse/ClickHouse/issues/40634). [#41189](https://github.com/ClickHouse/ClickHouse/pull/41189) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#44997](https://github.com/ClickHouse/ClickHouse/issues/44997): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). @@ -21,4 +21,3 @@ sidebar_label: 2023 * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). - diff --git a/docs/changelogs/v22.8.14.53-lts.md b/docs/changelogs/v22.8.14.53-lts.md new file mode 100644 index 00000000000..3cceb3475b6 --- /dev/null +++ b/docs/changelogs/v22.8.14.53-lts.md @@ -0,0 +1,39 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.14.53-lts (4ea67c40077) FIXME as compared to v22.8.13.20-lts (e4817946d18) + +#### Performance Improvement +* Backported in [#45845](https://github.com/ClickHouse/ClickHouse/issues/45845): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46374](https://github.com/ClickHouse/ClickHouse/issues/46374): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#46358](https://github.com/ClickHouse/ClickHouse/issues/46358): Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement +* Backported in [#46112](https://github.com/ClickHouse/ClickHouse/issues/46112): Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46482](https://github.com/ClickHouse/ClickHouse/issues/46482): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46505](https://github.com/ClickHouse/ClickHouse/issues/46505): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#45908](https://github.com/ClickHouse/ClickHouse/issues/45908): Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46238](https://github.com/ClickHouse/ClickHouse/issues/46238): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#45727](https://github.com/ClickHouse/ClickHouse/issues/45727): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#46394](https://github.com/ClickHouse/ClickHouse/issues/46394): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46442](https://github.com/ClickHouse/ClickHouse/issues/46442): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46674](https://github.com/ClickHouse/ClickHouse/issues/46674): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46879](https://github.com/ClickHouse/ClickHouse/issues/46879): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46871](https://github.com/ClickHouse/ClickHouse/issues/46871): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v22.8.15.23-lts.md b/docs/changelogs/v22.8.15.23-lts.md new file mode 100644 index 00000000000..5f49dfb1757 --- /dev/null +++ b/docs/changelogs/v22.8.15.23-lts.md @@ -0,0 +1,27 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.15.23-lts (d36fa168bbf) FIXME as compared to v22.8.14.53-lts (4ea67c40077) + +#### Improvement +* Backported in [#46981](https://github.com/ClickHouse/ClickHouse/issues/46981): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). + +#### Bug Fix +* Backported in [#47336](https://github.com/ClickHouse/ClickHouse/issues/47336): Sometimes after changing a role that could be not reflected on the access rights of a user who uses that role. This PR fixes that. [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#46901](https://github.com/ClickHouse/ClickHouse/issues/46901): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47156](https://github.com/ClickHouse/ClickHouse/issues/47156): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46987](https://github.com/ClickHouse/ClickHouse/issues/46987): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#47357](https://github.com/ClickHouse/ClickHouse/issues/47357): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v22.8.16.32-lts.md b/docs/changelogs/v22.8.16.32-lts.md new file mode 100644 index 00000000000..27ec1f1f145 --- /dev/null +++ b/docs/changelogs/v22.8.16.32-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.16.32-lts (7c4be737bd0) FIXME as compared to v22.8.15.23-lts (d36fa168bbf) + +#### Build/Testing/Packaging Improvement +* Backported in [#48344](https://github.com/ClickHouse/ClickHouse/issues/48344): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#48250](https://github.com/ClickHouse/ClickHouse/issues/48250): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)). +* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)). +* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)). +* Backport to 22.8: Fix moving broken parts to the detached for the object storage disk on startup [#48273](https://github.com/ClickHouse/ClickHouse/pull/48273) ([Aleksei Filatov](https://github.com/aalexfvk)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v22.8.17.17-lts.md b/docs/changelogs/v22.8.17.17-lts.md new file mode 100644 index 00000000000..9c8c3e1839b --- /dev/null +++ b/docs/changelogs/v22.8.17.17-lts.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.17.17-lts (df7f2ef0b41) FIXME as compared to v22.8.16.32-lts (7c4be737bd0) + +#### Improvement +* Backported in [#48157](https://github.com/ClickHouse/ClickHouse/issues/48157): Fixed `UNKNOWN_TABLE` exception when attaching to a materialized view that has dependent tables that are not available. This might be useful when trying to restore state from a backup. [#47975](https://github.com/ClickHouse/ClickHouse/pull/47975) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48957](https://github.com/ClickHouse/ClickHouse/issues/48957): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix explain graph with projection [#47473](https://github.com/ClickHouse/ClickHouse/pull/47473) ([flynn](https://github.com/ucasfl)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). + diff --git a/docs/changelogs/v22.8.3.13-lts.md b/docs/changelogs/v22.8.3.13-lts.md index 903f5b7a600..5f08bc9ee67 100644 --- a/docs/changelogs/v22.8.3.13-lts.md +++ b/docs/changelogs/v22.8.3.13-lts.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#40694](https://github.com/ClickHouse/ClickHouse/issues/40694): Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40451](https://github.com/ClickHouse/ClickHouse/issues/40451): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)). * Backported in [#40533](https://github.com/ClickHouse/ClickHouse/issues/40533): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)). diff --git a/docs/changelogs/v22.8.4.7-lts.md b/docs/changelogs/v22.8.4.7-lts.md index 93d9aa2bf1a..a852120ab8a 100644 --- a/docs/changelogs/v22.8.4.7-lts.md +++ b/docs/changelogs/v22.8.4.7-lts.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.8.4.7-lts (baad27bcd2f) FIXME as compared to v22.8.3.13-lts (6a15b73faea) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40760](https://github.com/ClickHouse/ClickHouse/issues/40760): Fix possible error 'Decimal math overflow' while parsing DateTime64. [#40546](https://github.com/ClickHouse/ClickHouse/pull/40546) ([Kruglov Pavel](https://github.com/Avogar)). * Backported in [#40811](https://github.com/ClickHouse/ClickHouse/issues/40811): In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)). diff --git a/docs/changelogs/v22.8.5.29-lts.md b/docs/changelogs/v22.8.5.29-lts.md index b7ad3c11a46..1b59501cc28 100644 --- a/docs/changelogs/v22.8.5.29-lts.md +++ b/docs/changelogs/v22.8.5.29-lts.md @@ -17,7 +17,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#41157](https://github.com/ClickHouse/ClickHouse/issues/41157): Add macOS binaries to GH release assets, it fixes [#37718](https://github.com/ClickHouse/ClickHouse/issues/37718). [#41088](https://github.com/ClickHouse/ClickHouse/pull/41088) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#40866](https://github.com/ClickHouse/ClickHouse/issues/40866): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)). * Backported in [#40805](https://github.com/ClickHouse/ClickHouse/issues/40805): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)). diff --git a/docs/changelogs/v22.8.6.71-lts.md b/docs/changelogs/v22.8.6.71-lts.md index 0337c5ba2e2..2ac6ef05bc4 100644 --- a/docs/changelogs/v22.8.6.71-lts.md +++ b/docs/changelogs/v22.8.6.71-lts.md @@ -16,7 +16,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#41560](https://github.com/ClickHouse/ClickHouse/issues/41560): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#41284](https://github.com/ClickHouse/ClickHouse/issues/41284): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). * Backported in [#41837](https://github.com/ClickHouse/ClickHouse/issues/41837): Fix vertical merge of parts with lightweight deleted rows. [#40559](https://github.com/ClickHouse/ClickHouse/pull/40559) ([Alexander Gololobov](https://github.com/davenger)). diff --git a/docs/changelogs/v22.8.7.34-lts.md b/docs/changelogs/v22.8.7.34-lts.md index ee55f5d9f1f..56f03ecdf3b 100644 --- a/docs/changelogs/v22.8.7.34-lts.md +++ b/docs/changelogs/v22.8.7.34-lts.md @@ -17,7 +17,7 @@ sidebar_label: 2022 * Backported in [#42296](https://github.com/ClickHouse/ClickHouse/issues/42296): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). * Backported in [#42360](https://github.com/ClickHouse/ClickHouse/issues/42360): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42489](https://github.com/ClickHouse/ClickHouse/issues/42489): Removed skipping of mutations in unaffected partitions of `MergeTree` tables, because this feature never worked correctly and might cause resurrection of finished mutations. [#40589](https://github.com/ClickHouse/ClickHouse/pull/40589) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#42121](https://github.com/ClickHouse/ClickHouse/issues/42121): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). diff --git a/docs/changelogs/v22.8.8.3-lts.md b/docs/changelogs/v22.8.8.3-lts.md index b4673eb955a..d42f333ce3f 100644 --- a/docs/changelogs/v22.8.8.3-lts.md +++ b/docs/changelogs/v22.8.8.3-lts.md @@ -7,6 +7,6 @@ sidebar_label: 2022 ### ClickHouse release v22.8.8.3-lts (ac5a6cababc) FIXME as compared to v22.8.7.34-lts (3c38e5e8ab9) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42677](https://github.com/ClickHouse/ClickHouse/issues/42677): keeper-fix: fix race in accessing logs while snapshot is being installed. [#40627](https://github.com/ClickHouse/ClickHouse/pull/40627) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/docs/changelogs/v22.8.9.24-lts.md b/docs/changelogs/v22.8.9.24-lts.md index 585327b83a1..15935c4cf4e 100644 --- a/docs/changelogs/v22.8.9.24-lts.md +++ b/docs/changelogs/v22.8.9.24-lts.md @@ -17,7 +17,7 @@ sidebar_label: 2022 * Backported in [#42964](https://github.com/ClickHouse/ClickHouse/issues/42964): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#43040](https://github.com/ClickHouse/ClickHouse/issues/43040): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42720](https://github.com/ClickHouse/ClickHouse/issues/42720): Fixed `Unknown identifier (aggregate-function)` exception which appears when a user tries to calculate WINDOW ORDER BY/PARTITION BY expressions over aggregate functions: ``` CREATE TABLE default.tenk1 ( `unique1` Int32, `unique2` Int32, `ten` Int32 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; SELECT ten, sum(unique1) + sum(unique2) AS res, rank() OVER (ORDER BY sum(unique1) + sum(unique2) ASC) AS rank FROM _complex GROUP BY ten ORDER BY ten ASC; ``` which gives: ``` Code: 47. DB::Exception: Received from localhost:9000. DB::Exception: Unknown identifier: sum(unique1); there are columns: unique1, unique2, ten: While processing sum(unique1) + sum(unique2) ASC. (UNKNOWN_IDENTIFIER) ```. [#39762](https://github.com/ClickHouse/ClickHouse/pull/39762) ([Vladimir Chebotaryov](https://github.com/quickhouse)). * Backported in [#42748](https://github.com/ClickHouse/ClickHouse/issues/42748): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). diff --git a/docs/changelogs/v22.9.2.7-stable.md b/docs/changelogs/v22.9.2.7-stable.md index 5c4db4bfa96..bbd0a6cce32 100644 --- a/docs/changelogs/v22.9.2.7-stable.md +++ b/docs/changelogs/v22.9.2.7-stable.md @@ -10,7 +10,7 @@ sidebar_label: 2022 #### Improvement * Backported in [#41709](https://github.com/ClickHouse/ClickHouse/issues/41709): Check file path for path traversal attacks in errors logger for input formats. [#41694](https://github.com/ClickHouse/ClickHouse/pull/41694) ([Kruglov Pavel](https://github.com/Avogar)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#41696](https://github.com/ClickHouse/ClickHouse/issues/41696): Fixes issue when docker run will fail if "https_port" is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). diff --git a/docs/changelogs/v22.9.3.18-stable.md b/docs/changelogs/v22.9.3.18-stable.md index 656cb1dfc22..a46dba6718c 100644 --- a/docs/changelogs/v22.9.3.18-stable.md +++ b/docs/changelogs/v22.9.3.18-stable.md @@ -7,7 +7,7 @@ sidebar_label: 2022 ### ClickHouse release v22.9.3.18-stable (0cb4b15d2fa) FIXME as compared to v22.9.2.7-stable (362e2cefcef) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#41902](https://github.com/ClickHouse/ClickHouse/issues/41902): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Backported in [#41863](https://github.com/ClickHouse/ClickHouse/issues/41863): 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)). diff --git a/docs/changelogs/v22.9.4.32-stable.md b/docs/changelogs/v22.9.4.32-stable.md index 658d39af079..92bcc01e408 100644 --- a/docs/changelogs/v22.9.4.32-stable.md +++ b/docs/changelogs/v22.9.4.32-stable.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#42297](https://github.com/ClickHouse/ClickHouse/issues/42297): Update cctz to the latest master, update tzdb to 2020e. [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). * Backported in [#42361](https://github.com/ClickHouse/ClickHouse/issues/42361): Update tzdata to 2022e to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42122](https://github.com/ClickHouse/ClickHouse/issues/42122): Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). * Backported in [#41938](https://github.com/ClickHouse/ClickHouse/issues/41938): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). diff --git a/docs/changelogs/v22.9.5.25-stable.md b/docs/changelogs/v22.9.5.25-stable.md index eb46fb893e7..90150726ace 100644 --- a/docs/changelogs/v22.9.5.25-stable.md +++ b/docs/changelogs/v22.9.5.25-stable.md @@ -14,7 +14,7 @@ sidebar_label: 2022 * Backported in [#42965](https://github.com/ClickHouse/ClickHouse/issues/42965): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#43041](https://github.com/ClickHouse/ClickHouse/issues/43041): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#42749](https://github.com/ClickHouse/ClickHouse/issues/42749): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). * Backported in [#42863](https://github.com/ClickHouse/ClickHouse/issues/42863): Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v22.9.6.20-stable.md b/docs/changelogs/v22.9.6.20-stable.md index d450f285848..7abc4adc32e 100644 --- a/docs/changelogs/v22.9.6.20-stable.md +++ b/docs/changelogs/v22.9.6.20-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#43052](https://github.com/ClickHouse/ClickHouse/issues/43052): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43505](https://github.com/ClickHouse/ClickHouse/issues/43505): Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). * Backported in [#43722](https://github.com/ClickHouse/ClickHouse/issues/43722): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). diff --git a/docs/changelogs/v22.9.7.34-stable.md b/docs/changelogs/v22.9.7.34-stable.md index 83be449f70d..ed8173eaf50 100644 --- a/docs/changelogs/v22.9.7.34-stable.md +++ b/docs/changelogs/v22.9.7.34-stable.md @@ -13,7 +13,7 @@ sidebar_label: 2022 #### Build/Testing/Packaging Improvement * Backported in [#44111](https://github.com/ClickHouse/ClickHouse/issues/44111): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#43612](https://github.com/ClickHouse/ClickHouse/issues/43612): Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). * Backported in [#43526](https://github.com/ClickHouse/ClickHouse/issues/43526): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). diff --git a/docs/changelogs/v23.1.1.3077-stable.md b/docs/changelogs/v23.1.1.3077-stable.md index e218be62f09..53ca9e1831c 100644 --- a/docs/changelogs/v23.1.1.3077-stable.md +++ b/docs/changelogs/v23.1.1.3077-stable.md @@ -125,7 +125,7 @@ sidebar_label: 2023 * SQLite library is updated to the latest. It is used for the SQLite database and table integration engines. Also, fixed a false-positive TSan report. This closes [#45027](https://github.com/ClickHouse/ClickHouse/issues/45027). [#45031](https://github.com/ClickHouse/ClickHouse/pull/45031) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix report sending in the case when FastTest failed. [#45588](https://github.com/ClickHouse/ClickHouse/pull/45588) ([Dmitry Novik](https://github.com/novikd)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * #40651 [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). * Fix possible use-of-unitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) CC: @nickitat. [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)). @@ -589,4 +589,3 @@ sidebar_label: 2023 * Resubmit "Fix possible in-use table after DETACH" [#45566](https://github.com/ClickHouse/ClickHouse/pull/45566) ([Alexander Tokmakov](https://github.com/tavplubix)). * Typo: "Granulesis" --> "Granules" [#45598](https://github.com/ClickHouse/ClickHouse/pull/45598) ([Robert Schulze](https://github.com/rschu1ze)). * Fix version in autogenerated_versions.txt [#45624](https://github.com/ClickHouse/ClickHouse/pull/45624) ([Dmitry Novik](https://github.com/novikd)). - diff --git a/docs/changelogs/v23.1.2.9-stable.md b/docs/changelogs/v23.1.2.9-stable.md index 272a2b95a86..7d34a6b9ec2 100644 --- a/docs/changelogs/v23.1.2.9-stable.md +++ b/docs/changelogs/v23.1.2.9-stable.md @@ -13,11 +13,10 @@ sidebar_label: 2023 #### Bug Fix * Backported in [#45673](https://github.com/ClickHouse/ClickHouse/issues/45673): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45730](https://github.com/ClickHouse/ClickHouse/issues/45730): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). #### NOT FOR CHANGELOG / INSIGNIFICANT * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v23.1.3.5-stable.md b/docs/changelogs/v23.1.3.5-stable.md index d4f39894bec..9f8ef928138 100644 --- a/docs/changelogs/v23.1.3.5-stable.md +++ b/docs/changelogs/v23.1.3.5-stable.md @@ -7,11 +7,10 @@ sidebar_label: 2023 ### ClickHouse release v23.1.3.5-stable (548b494bcce) FIXME as compared to v23.1.2.9-stable (8dfb1700858) -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Backported in [#45896](https://github.com/ClickHouse/ClickHouse/issues/45896): Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). #### NOT FOR CHANGELOG / INSIGNIFICANT * Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - diff --git a/docs/changelogs/v23.1.4.58-stable.md b/docs/changelogs/v23.1.4.58-stable.md new file mode 100644 index 00000000000..9081d700308 --- /dev/null +++ b/docs/changelogs/v23.1.4.58-stable.md @@ -0,0 +1,46 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.4.58-stable (9ed562163a5) FIXME as compared to v23.1.3.5-stable (548b494bcce) + +#### Performance Improvement +* Backported in [#46380](https://github.com/ClickHouse/ClickHouse/issues/46380): Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Improvement +* Backported in [#46985](https://github.com/ClickHouse/ClickHouse/issues/46985): - Apply `ALTER TABLE table_name ON CLUSTER cluster MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'` to all replicas. Because `ALTER TABLE t MOVE` is not replicated. [#46402](https://github.com/ClickHouse/ClickHouse/pull/46402) ([lizhuoyu5](https://github.com/lzydmxy)). +* Backported in [#46778](https://github.com/ClickHouse/ClickHouse/issues/46778): Backward compatibility for T64 codec support for IPv4. [#46747](https://github.com/ClickHouse/ClickHouse/pull/46747) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#47020](https://github.com/ClickHouse/ClickHouse/issues/47020): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Build/Testing/Packaging Improvement +* Backported in [#46031](https://github.com/ClickHouse/ClickHouse/issues/46031): Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46477](https://github.com/ClickHouse/ClickHouse/issues/46477): Get rid of unnecessary build for standalone clickhouse-keeper. [#46367](https://github.com/ClickHouse/ClickHouse/pull/46367) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#46511](https://github.com/ClickHouse/ClickHouse/issues/46511): Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#46228](https://github.com/ClickHouse/ClickHouse/issues/46228): A couple of seg faults have been reported around `c-ares`. All of the recent stack traces observed fail on inserting into `std::unodered_set<>`. I believe I have found the root cause of this, it seems to be unprocessed queries. Prior to this PR, CH calls `poll` to wait on the file descriptors in the `c-ares` channel. According to the [poll docs](https://man7.org/linux/man-pages/man2/poll.2.html), a negative return value means an error has ocurred. Because of this, we would abort the execution and return failure. The problem is that `poll` will also return a negative value if a system interrupt occurs. A system interrupt does not mean the processing has failed or ended, but we would abort it anyways because we were checking for negative values. Once the execution is aborted, the whole stack is destroyed, which includes the `std::unordered_set` passed to the `void *` parameter of the c-ares callback. Once c-ares completed the request, the callback would be invoked and would access an invalid memory address causing a segfault. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#46967](https://github.com/ClickHouse/ClickHouse/issues/46967): Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46220](https://github.com/ClickHouse/ClickHouse/issues/46220): Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#46751](https://github.com/ClickHouse/ClickHouse/issues/46751): Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#46448](https://github.com/ClickHouse/ClickHouse/issues/46448): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#46680](https://github.com/ClickHouse/ClickHouse/issues/46680): Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46873](https://github.com/ClickHouse/ClickHouse/issues/46873): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46956](https://github.com/ClickHouse/ClickHouse/issues/46956): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Another attempt to fix automerge, or at least to have debug footprint [#45476](https://github.com/ClickHouse/ClickHouse/pull/45476) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Support DELETE ON CLUSTER [#45786](https://github.com/ClickHouse/ClickHouse/pull/45786) ([Alexander Gololobov](https://github.com/davenger)). +* Add check for running workflows to merge_pr.py [#45803](https://github.com/ClickHouse/ClickHouse/pull/45803) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add necessary dependency for sanitizers [#45959](https://github.com/ClickHouse/ClickHouse/pull/45959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add helping logging to auto-merge script [#46080](https://github.com/ClickHouse/ClickHouse/pull/46080) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix write buffer destruction order for vertical merge. [#46205](https://github.com/ClickHouse/ClickHouse/pull/46205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Wait for background tasks in ~UploadHelper [#46334](https://github.com/ClickHouse/ClickHouse/pull/46334) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Improve install_check.py [#46458](https://github.com/ClickHouse/ClickHouse/pull/46458) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix dependencies for InstallPackagesTestAarch64 [#46597](https://github.com/ClickHouse/ClickHouse/pull/46597) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v23.1.5.24-stable.md b/docs/changelogs/v23.1.5.24-stable.md new file mode 100644 index 00000000000..934e97312c0 --- /dev/null +++ b/docs/changelogs/v23.1.5.24-stable.md @@ -0,0 +1,27 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.5.24-stable (0e51b53ba99) FIXME as compared to v23.1.4.58-stable (9ed562163a5) + +#### Build/Testing/Packaging Improvement +* Backported in [#47060](https://github.com/ClickHouse/ClickHouse/issues/47060): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#46401](https://github.com/ClickHouse/ClickHouse/issues/46401): Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#46905](https://github.com/ClickHouse/ClickHouse/issues/46905): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47211](https://github.com/ClickHouse/ClickHouse/issues/47211): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#47118](https://github.com/ClickHouse/ClickHouse/issues/47118): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46883](https://github.com/ClickHouse/ClickHouse/issues/46883): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#47361](https://github.com/ClickHouse/ClickHouse/issues/47361): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v23.1.6.42-stable.md b/docs/changelogs/v23.1.6.42-stable.md new file mode 100644 index 00000000000..21fb9220443 --- /dev/null +++ b/docs/changelogs/v23.1.6.42-stable.md @@ -0,0 +1,34 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.6.42-stable (783ddf67991) FIXME as compared to v23.1.5.24-stable (0e51b53ba99) + +#### Build/Testing/Packaging Improvement +* Backported in [#48215](https://github.com/ClickHouse/ClickHouse/issues/48215): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#48254](https://github.com/ClickHouse/ClickHouse/issues/48254): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix changing an expired role [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)). +* Fix NOT_IMPLEMENTED error with CROSS JOIN and algorithm = auto [#47068](https://github.com/ClickHouse/ClickHouse/pull/47068) ([Vladimir C](https://github.com/vdimir)). +* Disable logical expression optimizer for expression with aliases. [#47451](https://github.com/ClickHouse/ClickHouse/pull/47451) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parameterized view bug fix 47287 47247 [#47495](https://github.com/ClickHouse/ClickHouse/pull/47495) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)). +* Hotfix for too verbose warnings in HTTP [#47903](https://github.com/ClickHouse/ClickHouse/pull/47903) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Better error messages in ReplicatedMergeTreeAttachThread [#47454](https://github.com/ClickHouse/ClickHouse/pull/47454) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `00933_test_fix_extra_seek_on_compressed_cache` in releases. [#47490](https://github.com/ClickHouse/ClickHouse/pull/47490) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v23.1.7.30-stable.md b/docs/changelogs/v23.1.7.30-stable.md new file mode 100644 index 00000000000..80f683f6ac8 --- /dev/null +++ b/docs/changelogs/v23.1.7.30-stable.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.7.30-stable (c94dba6e023) FIXME as compared to v23.1.6.42-stable (783ddf67991) + +#### Improvement +* Backported in [#48161](https://github.com/ClickHouse/ClickHouse/issues/48161): Fixed `UNKNOWN_TABLE` exception when attaching to a materialized view that has dependent tables that are not available. This might be useful when trying to restore state from a backup. [#47975](https://github.com/ClickHouse/ClickHouse/pull/47975) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48585](https://github.com/ClickHouse/ClickHouse/issues/48585): Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#48958](https://github.com/ClickHouse/ClickHouse/issues/48958): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix race in grace hash join with limit [#47153](https://github.com/ClickHouse/ClickHouse/pull/47153) ([Vladimir C](https://github.com/vdimir)). +* Fix explain graph with projection [#47473](https://github.com/ClickHouse/ClickHouse/pull/47473) ([flynn](https://github.com/ucasfl)). +* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + diff --git a/docs/changelogs/v23.2.1.2537-stable.md b/docs/changelogs/v23.2.1.2537-stable.md index 3fdcf6d6571..9da81c039ea 100644 --- a/docs/changelogs/v23.2.1.2537-stable.md +++ b/docs/changelogs/v23.2.1.2537-stable.md @@ -161,7 +161,7 @@ sidebar_label: 2023 * Some time ago the ccache compression was changed to `zst`, but `gz` archives are downloaded by default. It fixes it by prioritizing zst archive. [#46490](https://github.com/ClickHouse/ClickHouse/pull/46490) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Raised the minimum Clang version needed to build ClickHouse from 12 to 15. [#46710](https://github.com/ClickHouse/ClickHouse/pull/46710) ([Robert Schulze](https://github.com/rschu1ze)). -#### Bug Fix (user-visible misbehavior in official stable or prestable release) +#### Bug Fix (user-visible misbehavior in official stable release) * Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)). * - Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)). @@ -470,4 +470,3 @@ sidebar_label: 2023 #### Testing Improvement * Fixed functional test 00304_http_external_data for s390x. [#45807](https://github.com/ClickHouse/ClickHouse/pull/45807) ([Harry Lee](https://github.com/HarryLeeIBM)). - diff --git a/docs/changelogs/v23.2.2.20-stable.md b/docs/changelogs/v23.2.2.20-stable.md new file mode 100644 index 00000000000..b92fbdebe33 --- /dev/null +++ b/docs/changelogs/v23.2.2.20-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.2.20-stable (f6c269c8df2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a) + +#### Improvement +* Backported in [#46914](https://github.com/ClickHouse/ClickHouse/issues/46914): Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#47022](https://github.com/ClickHouse/ClickHouse/issues/47022): Allow IPv4 in range(). [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### Bug Fix +* Backported in [#46828](https://github.com/ClickHouse/ClickHouse/issues/46828): Combined PREWHERE column accumulated from multiple PREWHERE in some cases didn't contain 0's from previous steps. The fix is to apply final filter if we know that it wasn't applied from more than 1 last step. [#46785](https://github.com/ClickHouse/ClickHouse/pull/46785) ([Alexander Gololobov](https://github.com/davenger)). + +#### Build/Testing/Packaging Improvement +* Backported in [#47062](https://github.com/ClickHouse/ClickHouse/issues/47062): Fix error during server startup on old distros (e.g. Amazon Linux 2) and on ARM that glibc 2.28 symbols are not found. [#47008](https://github.com/ClickHouse/ClickHouse/pull/47008) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#46895](https://github.com/ClickHouse/ClickHouse/issues/46895): Fixed a bug in automatic retries of `DROP TABLE` query with `ReplicatedMergeTree` tables and `Atomic` databases. In rare cases it could lead to `Can't get data for node /zk_path/log_pointer` and `The specified key does not exist` errors if ZooKeeper session expired during DROP and a new replicated table with the same path in ZooKeeper was created in parallel. [#46384](https://github.com/ClickHouse/ClickHouse/pull/46384) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#46865](https://github.com/ClickHouse/ClickHouse/issues/46865): Fix a bug in the `Map` data type. This closes [#46855](https://github.com/ClickHouse/ClickHouse/issues/46855). [#46856](https://github.com/ClickHouse/ClickHouse/pull/46856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#46958](https://github.com/ClickHouse/ClickHouse/issues/46958): Fix result of LIKE predicates which translate to substring searches and contain quoted non-LIKE metacharacters. [#46875](https://github.com/ClickHouse/ClickHouse/pull/46875) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* More concise logging at trace level for PREWHERE steps [#46771](https://github.com/ClickHouse/ClickHouse/pull/46771) ([Alexander Gololobov](https://github.com/davenger)). +* Reduce updates of Mergeable Check [#46781](https://github.com/ClickHouse/ClickHouse/pull/46781) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v23.2.3.17-stable.md b/docs/changelogs/v23.2.3.17-stable.md new file mode 100644 index 00000000000..75b7f8b2b20 --- /dev/null +++ b/docs/changelogs/v23.2.3.17-stable.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.3.17-stable (dec18bf7281) FIXME as compared to v23.2.2.20-stable (f6c269c8df2) + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#46907](https://github.com/ClickHouse/ClickHouse/issues/46907): - Fix incorrect alias recursion in QueryNormalizer. [#46609](https://github.com/ClickHouse/ClickHouse/pull/46609) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#47091](https://github.com/ClickHouse/ClickHouse/issues/47091): - Fix arithmetic operations in aggregate optimization with `min` and `max`. [#46705](https://github.com/ClickHouse/ClickHouse/pull/46705) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#46885](https://github.com/ClickHouse/ClickHouse/issues/46885): Fix MSan report in the `maxIntersections` function. This closes [#43126](https://github.com/ClickHouse/ClickHouse/issues/43126). [#46847](https://github.com/ClickHouse/ClickHouse/pull/46847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#47067](https://github.com/ClickHouse/ClickHouse/issues/47067): Fix typo in systemd service, which causes the systemd service start to fail. [#47051](https://github.com/ClickHouse/ClickHouse/pull/47051) ([Palash Goel](https://github.com/palash-goel)). +* Backported in [#47259](https://github.com/ClickHouse/ClickHouse/issues/47259): Fix concrete columns PREWHERE support. [#47154](https://github.com/ClickHouse/ClickHouse/pull/47154) ([Azat Khuzhin](https://github.com/azat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v23.2.4.12-stable.md b/docs/changelogs/v23.2.4.12-stable.md new file mode 100644 index 00000000000..1542e3257ce --- /dev/null +++ b/docs/changelogs/v23.2.4.12-stable.md @@ -0,0 +1,19 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.4.12-stable (8fe866cb035) FIXME as compared to v23.2.3.17-stable (dec18bf7281) + +#### Bug Fix (user-visible misbehavior in official stable release) + +* Backported in [#47277](https://github.com/ClickHouse/ClickHouse/issues/47277): Fix IPv4/IPv6 serialization/deserialization in binary formats that was broken in https://github.com/ClickHouse/ClickHouse/pull/43221. Closes [#46522](https://github.com/ClickHouse/ClickHouse/issues/46522). [#46616](https://github.com/ClickHouse/ClickHouse/pull/46616) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#47212](https://github.com/ClickHouse/ClickHouse/issues/47212): `INSERT` queries through native TCP protocol and HTTP protocol were not canceled correctly in some cases. It could lead to a partially applied query if a client canceled the query, or if a client died or, in rare cases, on network errors. As a result, it could lead to not working deduplication. Fixes [#27667](https://github.com/ClickHouse/ClickHouse/issues/27667) and [#45377](https://github.com/ClickHouse/ClickHouse/issues/45377). [#46681](https://github.com/ClickHouse/ClickHouse/pull/46681) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#47363](https://github.com/ClickHouse/ClickHouse/issues/47363): Fix possible deadlock on distributed query cancellation. [#47161](https://github.com/ClickHouse/ClickHouse/pull/47161) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). diff --git a/docs/changelogs/v23.2.5.46-stable.md b/docs/changelogs/v23.2.5.46-stable.md new file mode 100644 index 00000000000..b3ce585848b --- /dev/null +++ b/docs/changelogs/v23.2.5.46-stable.md @@ -0,0 +1,40 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.5.46-stable (b50faecbb12) FIXME as compared to v23.2.4.12-stable (8fe866cb035) + +#### Improvement +* Backported in [#48164](https://github.com/ClickHouse/ClickHouse/issues/48164): Fixed `UNKNOWN_TABLE` exception when attaching to a materialized view that has dependent tables that are not available. This might be useful when trying to restore state from a backup. [#47975](https://github.com/ClickHouse/ClickHouse/pull/47975) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48216](https://github.com/ClickHouse/ClickHouse/issues/48216): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#48256](https://github.com/ClickHouse/ClickHouse/issues/48256): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix changing an expired role [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)). +* Fix NOT_IMPLEMENTED error with CROSS JOIN and algorithm = auto [#47068](https://github.com/ClickHouse/ClickHouse/pull/47068) ([Vladimir C](https://github.com/vdimir)). +* Disable logical expression optimizer for expression with aliases. [#47451](https://github.com/ClickHouse/ClickHouse/pull/47451) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Parameterized view bug fix 47287 47247 [#47495](https://github.com/ClickHouse/ClickHouse/pull/47495) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Proper fix for bug in parquet, revert reverted [#45878](https://github.com/ClickHouse/ClickHouse/issues/45878) [#47538](https://github.com/ClickHouse/ClickHouse/pull/47538) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)). +* Hotfix for too verbose warnings in HTTP [#47903](https://github.com/ClickHouse/ClickHouse/pull/47903) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* fix: keeper systemd service file include invalid inline comment [#47105](https://github.com/ClickHouse/ClickHouse/pull/47105) ([SuperDJY](https://github.com/cmsxbc)). +* Better error messages in ReplicatedMergeTreeAttachThread [#47454](https://github.com/ClickHouse/ClickHouse/pull/47454) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `00933_test_fix_extra_seek_on_compressed_cache` in releases. [#47490](https://github.com/ClickHouse/ClickHouse/pull/47490) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix startup on older systemd versions [#47689](https://github.com/ClickHouse/ClickHouse/pull/47689) ([Thomas Casteleyn](https://github.com/Hipska)). +* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v23.2.6.34-stable.md b/docs/changelogs/v23.2.6.34-stable.md new file mode 100644 index 00000000000..c6f73da843d --- /dev/null +++ b/docs/changelogs/v23.2.6.34-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.6.34-stable (570190045b0) FIXME as compared to v23.2.5.46-stable (b50faecbb12) + +#### Improvement +* Backported in [#48709](https://github.com/ClickHouse/ClickHouse/issues/48709): Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48587](https://github.com/ClickHouse/ClickHouse/issues/48587): Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#48959](https://github.com/ClickHouse/ClickHouse/issues/48959): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix race in grace hash join with limit [#47153](https://github.com/ClickHouse/ClickHouse/pull/47153) ([Vladimir C](https://github.com/vdimir)). +* Fix explain graph with projection [#47473](https://github.com/ClickHouse/ClickHouse/pull/47473) ([flynn](https://github.com/ucasfl)). +* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + diff --git a/docs/changelogs/v23.3.1.2823-lts.md b/docs/changelogs/v23.3.1.2823-lts.md new file mode 100644 index 00000000000..0c9be3601da --- /dev/null +++ b/docs/changelogs/v23.3.1.2823-lts.md @@ -0,0 +1,545 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.1.2823-lts (46e85357ce2) FIXME as compared to v23.2.1.2537-stable (52bf836e03a) + +#### Backward Incompatible Change +* Relax symbols that are allowed in URL authority in *domain*RFC()/netloc(). [#46841](https://github.com/ClickHouse/ClickHouse/pull/46841) ([Azat Khuzhin](https://github.com/azat)). +* Prohibit create tables based on KafkaEngine with DEFAULT/EPHEMERAL/ALIAS/MATERIALIZED statements for columns. [#47138](https://github.com/ClickHouse/ClickHouse/pull/47138) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* An "asynchronous connection drain" feature is removed. Related settings and metrics are removed as well. It was an internal feature, so the removal should not affect users who had never heard about that feature. [#47486](https://github.com/ClickHouse/ClickHouse/pull/47486) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support 256-bit Decimal data type (more than 38 digits) in `arraySum`/`Min`/`Max`/`Avg`/`Product`, `arrayCumSum`/`CumSumNonNegative`, `arrayDifference`, array construction, IN operator, query parameters, `groupArrayMovingSum`, statistical functions, `min`/`max`/`any`/`argMin`/`argMax`, PostgreSQL wire protocol, MySQL table engine and function, `sumMap`, `mapAdd`, `mapSubtract`, `arrayIntersect`. Add support for big integers in `arrayIntersect`. Statistical aggregate functions involving moments (such as `corr` or various `TTest`s) will use `Float64` as their internal representation (they were using `Decimal128` before this change, but it was pointless), and these functions can return `nan` instead of `inf` in case of infinite variance. Some functions were allowed on `Decimal256` data types but returned `Decimal128` in previous versions - now it is fixed. This closes [#47569](https://github.com/ClickHouse/ClickHouse/issues/47569). This closes [#44864](https://github.com/ClickHouse/ClickHouse/issues/44864). This closes [#28335](https://github.com/ClickHouse/ClickHouse/issues/28335). [#47594](https://github.com/ClickHouse/ClickHouse/pull/47594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make backup_threads/restore_threads server settings. [#47881](https://github.com/ClickHouse/ClickHouse/pull/47881) ([Azat Khuzhin](https://github.com/azat)). +* Fix the isIPv6String function which could have outputted a false positive result in the case of an incorrect IPv6 address. For example `1234::1234:` was considered a valid IPv6 address. [#47895](https://github.com/ClickHouse/ClickHouse/pull/47895) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### New Feature +* Add new mode for splitting the work on replicas using settings `parallel_replicas_custom_key` and `parallel_replicas_custom_key_filter_type`. If the cluster consists of a single shard with multiple replicas, up to `max_parallel_replicas` will be randomly picked and turned into shards. For each shard, a corresponding filter is added to the query on the initiator before being sent to the shard. If the cluster consists of multiple shards, it will behave the same as `sample_key` but with the possibility to define an arbitrary key. [#45108](https://github.com/ClickHouse/ClickHouse/pull/45108) ([Antonio Andelic](https://github.com/antonio2368)). +* Added query setting `partial_result_on_first_cancel` allowing the canceled query (e.g. due to Ctrl-C) to return a partial result. [#45689](https://github.com/ClickHouse/ClickHouse/pull/45689) ([Alexey Perevyshin](https://github.com/alexX512)). +* Added support of arbitrary tables engines for temporary tables except for Replicated and KeeperMap engines. Partially close [#31497](https://github.com/ClickHouse/ClickHouse/issues/31497). [#46071](https://github.com/ClickHouse/ClickHouse/pull/46071) ([Roman Vasin](https://github.com/rvasin)). +* Add replication of user-defined SQL functions using ZooKeeper. [#46085](https://github.com/ClickHouse/ClickHouse/pull/46085) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Implement `system.server_settings` (similar to `system.settings`), which will contain server configurations. [#46550](https://github.com/ClickHouse/ClickHouse/pull/46550) ([pufit](https://github.com/pufit)). +* Intruduce a function `WIDTH_BUCKET`. [#42974](https://github.com/ClickHouse/ClickHouse/issues/42974). [#46790](https://github.com/ClickHouse/ClickHouse/pull/46790) ([avoiderboi](https://github.com/avoiderboi)). +* Add new function parseDateTime/parseDateTimeInJodaSyntax according to specified format string. parseDateTime parses string to datetime in MySQL syntax, parseDateTimeInJodaSyntax parses in Joda syntax. [#46815](https://github.com/ClickHouse/ClickHouse/pull/46815) ([李扬](https://github.com/taiyang-li)). +* Use `dummy UInt8` for default structure of table function `null`. Closes [#46930](https://github.com/ClickHouse/ClickHouse/issues/46930). [#47006](https://github.com/ClickHouse/ClickHouse/pull/47006) ([flynn](https://github.com/ucasfl)). +* Dec 15, 2021 support for parseDateTimeBestEffort function. closes [#46816](https://github.com/ClickHouse/ClickHouse/issues/46816). [#47071](https://github.com/ClickHouse/ClickHouse/pull/47071) ([chen](https://github.com/xiedeyantu)). +* Add function ULIDStringToDateTime(). Closes [#46945](https://github.com/ClickHouse/ClickHouse/issues/46945). [#47087](https://github.com/ClickHouse/ClickHouse/pull/47087) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add settings `http_wait_end_of_query` and `http_response_buffer_size` that corresponds to URL params `wait_end_of_query` and `buffer_size` for HTTP interface. [#47108](https://github.com/ClickHouse/ClickHouse/pull/47108) ([Vladimir C](https://github.com/vdimir)). +* Support for `UNDROP TABLE` query. Closes [#46811](https://github.com/ClickHouse/ClickHouse/issues/46811). [#47241](https://github.com/ClickHouse/ClickHouse/pull/47241) ([chen](https://github.com/xiedeyantu)). +* Add `system.marked_dropped_tables` table that shows tables that were dropped from `Atomic` databases but were not completely removed yet. [#47364](https://github.com/ClickHouse/ClickHouse/pull/47364) ([chen](https://github.com/xiedeyantu)). +* Add `INSTR` as alias of `positionCaseInsensitive` for MySQL compatibility. Closes [#47529](https://github.com/ClickHouse/ClickHouse/issues/47529). [#47535](https://github.com/ClickHouse/ClickHouse/pull/47535) ([flynn](https://github.com/ucasfl)). +* Added `toDecimalString` function allowing to convert numbers to string with fixed precision. [#47838](https://github.com/ClickHouse/ClickHouse/pull/47838) ([Andrey Zvonov](https://github.com/zvonand)). +* Added operator "REGEXP" (similar to operators "LIKE", "IN", "MOD" etc.) for better compatibility with MySQL. [#47869](https://github.com/ClickHouse/ClickHouse/pull/47869) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow executing reading pipeline for DIRECT dictionary with CLICKHOUSE source in multiple threads. To enable set `dictionary_use_async_executor=1` in `SETTINGS` section for source in `CREATE DICTIONARY` statement. [#47986](https://github.com/ClickHouse/ClickHouse/pull/47986) ([Vladimir C](https://github.com/vdimir)). +* Add merge tree setting `max_number_of_mutatuins_for_replica`. It limit the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings). [#48047](https://github.com/ClickHouse/ClickHouse/pull/48047) ([Vladimir C](https://github.com/vdimir)). + +#### Performance Improvement +* Optimize one nullable key aggregate performance. [#45772](https://github.com/ClickHouse/ClickHouse/pull/45772) ([LiuNeng](https://github.com/liuneng1994)). +* Implemented lowercase tokenbf_v1 index utilization for hasTokenOrNull, hasTokenCaseInsensitive and hasTokenCaseInsensitiveOrNull. [#46252](https://github.com/ClickHouse/ClickHouse/pull/46252) ([ltrk2](https://github.com/ltrk2)). +* Optimize the generic SIMD StringSearcher by searching first two chars. [#46289](https://github.com/ClickHouse/ClickHouse/pull/46289) ([Jiebin Sun](https://github.com/jiebinn)). +* System.detached_parts could be significant large. - added several sources with respects block size limitation - in each block iothread pool is used to calculate part size, ie to make syscalls in parallel. [#46624](https://github.com/ClickHouse/ClickHouse/pull/46624) ([Sema Checherinda](https://github.com/CheSema)). +* Increase the default value of `max_replicated_merges_in_queue` for ReplicatedMergeTree tables from 16 to 1000. It allows faster background merge operation on clusters with a very large number of replicas, such as clusters with shared storage in ClickHouse Cloud. [#47050](https://github.com/ClickHouse/ClickHouse/pull/47050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backups for large numbers of files were unbelievably slow in previous versions. [#47251](https://github.com/ClickHouse/ClickHouse/pull/47251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support filter push down to left table for JOIN with StorageJoin, StorageDictionary, StorageEmbeddedRocksDB. [#47280](https://github.com/ClickHouse/ClickHouse/pull/47280) ([Maksim Kita](https://github.com/kitaisreal)). +* Marks in memory are now compressed, using 3-6x less memory. [#47290](https://github.com/ClickHouse/ClickHouse/pull/47290) ([Michael Kolupaev](https://github.com/al13n321)). +* Updated copier to use group by instead of distinct to get list of partitions. For large tables this reduced the select time from over 500s to under 1s. [#47386](https://github.com/ClickHouse/ClickHouse/pull/47386) ([Clayton McClure](https://github.com/cmcclure-twilio)). +* Address https://github.com/clickhouse/clickhouse/issues/46453. bisect marked https://github.com/clickhouse/clickhouse/pull/35525 as the bad changed. this pr looks to reverse the changes in that pr. [#47544](https://github.com/ClickHouse/ClickHouse/pull/47544) ([Ongkong](https://github.com/ongkong)). +* Fixed excessive reading in queries with `FINAL`. [#47801](https://github.com/ClickHouse/ClickHouse/pull/47801) ([Nikita Taranov](https://github.com/nickitat)). +* Setting `max_final_threads` would be set to number of cores at server startup (by the same algorithm as we use for `max_threads`). This improves concurrency of `final` execution on servers with high number of CPUs. [#47915](https://github.com/ClickHouse/ClickHouse/pull/47915) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid breaking batches on read requests to improve performance. [#47978](https://github.com/ClickHouse/ClickHouse/pull/47978) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Improvement +* Add map related functions: mapFromArrays, which allows us to create map from a pair of arrays. [#31125](https://github.com/ClickHouse/ClickHouse/pull/31125) ([李扬](https://github.com/taiyang-li)). +* Rewrite distributed sends to avoid using filesystem as a queue, use in-memory queue instead. [#45491](https://github.com/ClickHouse/ClickHouse/pull/45491) ([Azat Khuzhin](https://github.com/azat)). +* Allow separate grants for named collections (e.g. to be able to give `SHOW/CREATE/ALTER/DROP named collection` access only to certain collections, instead of all at once). Closes [#40894](https://github.com/ClickHouse/ClickHouse/issues/40894). Add new access type `NAMED_COLLECTION_CONTROL` which is not given to default user unless explicitly added to user config (is required to be able to do `GRANT ALL`), also `show_named_collections` is no longer obligatory to be manually specified for default user to be able to have full access rights as was in 23.2. [#46241](https://github.com/ClickHouse/ClickHouse/pull/46241) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Now `X-ClickHouse-Query-Id` and `X-ClickHouse-Timezone` headers are added to response in all queries via http protocol. Previously it was done only for `SELECT` queries. [#46364](https://github.com/ClickHouse/ClickHouse/pull/46364) ([Anton Popov](https://github.com/CurtizJ)). +* Support for connection to a replica set via a URI with a host:port enum and support for the readPreference option in MongoDB dictionaries. Example URI: mongodb://db0.example.com:27017,db1.example.com:27017,db2.example.com:27017/?replicaSet=myRepl&readPreference=primary. [#46524](https://github.com/ClickHouse/ClickHouse/pull/46524) ([artem-yadr](https://github.com/artem-yadr)). +* Re-implement projection analysis on top of query plan. Added setting `query_plan_optimize_projection=1` to switch between old and new version. Fixes [#44963](https://github.com/ClickHouse/ClickHouse/issues/44963). [#46537](https://github.com/ClickHouse/ClickHouse/pull/46537) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Use parquet format v2 instead of v1 in output format by default. Add setting `output_format_parquet_version` to control parquet version, possible values `v1_0`, `v2_4`, `v2_6`, `v2_latest` (default). [#46617](https://github.com/ClickHouse/ClickHouse/pull/46617) ([Kruglov Pavel](https://github.com/Avogar)). +* Not for changelog - part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#46632](https://github.com/ClickHouse/ClickHouse/pull/46632) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Allow to ignore errors while pushing to MATERILIZED VIEW (add new setting `materialized_views_ignore_errors`, by default to `false`, but it is set to `true` for flushing logs to `system.*_log` tables unconditionally). [#46658](https://github.com/ClickHouse/ClickHouse/pull/46658) ([Azat Khuzhin](https://github.com/azat)). +* Enable input_format_json_ignore_unknown_keys_in_named_tuple by default. [#46742](https://github.com/ClickHouse/ClickHouse/pull/46742) ([Kruglov Pavel](https://github.com/Avogar)). +* It is now possible using new configuration syntax to configure Kafka topics with periods in their name. [#46752](https://github.com/ClickHouse/ClickHouse/pull/46752) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix heuristics that check hyperscan patterns for problematic repeats. [#46819](https://github.com/ClickHouse/ClickHouse/pull/46819) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't report ZK node exists to system.errors when a block was created concurrently by a different replica. [#46820](https://github.com/ClickHouse/ClickHouse/pull/46820) ([Raúl Marín](https://github.com/Algunenano)). +* Allow PREWHERE for Merge with different DEFAULT expression for column. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). +* Increase the limit for opened files in `clickhouse-local`. It will be able to read from `web` tables on servers with a huge number of CPU cores. Do not back off reading from the URL table engine in case of too many opened files. This closes [#46852](https://github.com/ClickHouse/ClickHouse/issues/46852). [#46853](https://github.com/ClickHouse/ClickHouse/pull/46853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Exceptions thrown when numbers cannot be parsed now have an easier-to-read exception message. [#46917](https://github.com/ClickHouse/ClickHouse/pull/46917) ([Robert Schulze](https://github.com/rschu1ze)). +* Added update `system.backups` after every processed task. [#46989](https://github.com/ClickHouse/ClickHouse/pull/46989) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow types conversion in Native input format. Add settings `input_format_native_allow_types_conversion` that controls it (enabled by default). [#46990](https://github.com/ClickHouse/ClickHouse/pull/46990) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow IPv4 in the `range` function to generate IP ranges. [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Role change was not promoted sometimes before https://github.com/ClickHouse/ClickHouse/pull/46772 This PR just adds tests. [#47002](https://github.com/ClickHouse/ClickHouse/pull/47002) ([Ilya Golshtein](https://github.com/ilejn)). +* Improve exception message when it's impossible to make part move from one volume/disk to another. [#47032](https://github.com/ClickHouse/ClickHouse/pull/47032) ([alesapin](https://github.com/alesapin)). +* Support `Bool` type in `JSONType` function. Previously `Null` type was mistakenly returned for bool values. [#47046](https://github.com/ClickHouse/ClickHouse/pull/47046) ([Anton Popov](https://github.com/CurtizJ)). +* Use _request_body parameter to configure predefined http queries. [#47086](https://github.com/ClickHouse/ClickHouse/pull/47086) ([Constantine Peresypkin](https://github.com/pkit)). +* Removing logging of custom disk structure. [#47103](https://github.com/ClickHouse/ClickHouse/pull/47103) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow nested custom disks. Previously custom disks supported only flat disk structure. [#47106](https://github.com/ClickHouse/ClickHouse/pull/47106) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Automatic indentation in the built-in UI SQL editor when Enter is pressed. [#47113](https://github.com/ClickHouse/ClickHouse/pull/47113) ([Alexey Korepanov](https://github.com/alexkorep)). +* Allow control compression in Parquet/ORC/Arrow output formats, support more compression for input formats. This closes [#13541](https://github.com/ClickHouse/ClickHouse/issues/13541). [#47114](https://github.com/ClickHouse/ClickHouse/pull/47114) ([Kruglov Pavel](https://github.com/Avogar)). +* Self-extraction with 'sudo' will attempt to set uid and gid of extracted files to running user. [#47116](https://github.com/ClickHouse/ClickHouse/pull/47116) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Currently the funtion repeat's second argument must be unsigned integer type, which can not accept a integer value like -1. And this is different from the spark function, so I fix this here to make it same as spark. And it tested as below. [#47134](https://github.com/ClickHouse/ClickHouse/pull/47134) ([KevinyhZou](https://github.com/KevinyhZou)). +* Remove `::__1` part from stacktraces. Display `std::basic_string 1 trailing % [#46869](https://github.com/ClickHouse/ClickHouse/pull/46869) ([Robert Schulze](https://github.com/rschu1ze)). +* Add new metrics to system.asynchronous_metrics [#46886](https://github.com/ClickHouse/ClickHouse/pull/46886) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `test_concurrent_queries_restriction_by_query_kind` [#46887](https://github.com/ClickHouse/ClickHouse/pull/46887) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test test_async_backups_to_same_destination. [#46888](https://github.com/ClickHouse/ClickHouse/pull/46888) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make ASTSelectQuery::formatImpl() more robust [#46889](https://github.com/ClickHouse/ClickHouse/pull/46889) ([Robert Schulze](https://github.com/rschu1ze)). +* tests: fix 02116_interactive_hello for "official build" [#46911](https://github.com/ClickHouse/ClickHouse/pull/46911) ([Azat Khuzhin](https://github.com/azat)). +* Fix some expect tests leftovers and enable them in fasttest [#46915](https://github.com/ClickHouse/ClickHouse/pull/46915) ([Azat Khuzhin](https://github.com/azat)). +* Increase ddl timeout for DROP statement in backup restore tests [#46920](https://github.com/ClickHouse/ClickHouse/pull/46920) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* A better alternative to [#46344](https://github.com/ClickHouse/ClickHouse/issues/46344) [#46921](https://github.com/ClickHouse/ClickHouse/pull/46921) ([Robert Schulze](https://github.com/rschu1ze)). +* Code review from @tavplubix [#46922](https://github.com/ClickHouse/ClickHouse/pull/46922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Planner: trivial count optimization [#46923](https://github.com/ClickHouse/ClickHouse/pull/46923) ([Igor Nikonov](https://github.com/devcrafter)). +* Typo: SIZES_OF_ARRAYS_DOESNT_MATCH --> SIZES_OF_ARRAYS_DONT_MATCH [#46940](https://github.com/ClickHouse/ClickHouse/pull/46940) ([Robert Schulze](https://github.com/rschu1ze)). +* Another fix for clone() for ASTColumnMatchers [#46947](https://github.com/ClickHouse/ClickHouse/pull/46947) ([Nikolay Degterinsky](https://github.com/evillique)). +* Un-inline likePatternToRegexp() [#46950](https://github.com/ClickHouse/ClickHouse/pull/46950) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix missing format_description [#46959](https://github.com/ClickHouse/ClickHouse/pull/46959) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* ARM: Activate LDAPR with -march flag instead via -XClang [#46960](https://github.com/ClickHouse/ClickHouse/pull/46960) ([Robert Schulze](https://github.com/rschu1ze)). +* Preset description on the tweak reset [#46963](https://github.com/ClickHouse/ClickHouse/pull/46963) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.3.19.6-lts [#46964](https://github.com/ClickHouse/ClickHouse/pull/46964) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.14.53-lts [#46969](https://github.com/ClickHouse/ClickHouse/pull/46969) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Better exception messages when schema_inference_hints is ill-formatted [#46971](https://github.com/ClickHouse/ClickHouse/pull/46971) ([Kruglov Pavel](https://github.com/Avogar)). +* Decrease log level in "disks" [#46976](https://github.com/ClickHouse/ClickHouse/pull/46976) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change the cherry-pick PR body [#46977](https://github.com/ClickHouse/ClickHouse/pull/46977) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rename recent stateless tests to fix order [#46991](https://github.com/ClickHouse/ClickHouse/pull/46991) ([Kruglov Pavel](https://github.com/Avogar)). +* Pass headers from StorageURL to WriteBufferFromHTTP [#46996](https://github.com/ClickHouse/ClickHouse/pull/46996) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Change level log in executeQuery [#46997](https://github.com/ClickHouse/ClickHouse/pull/46997) ([Andrey Bystrov](https://github.com/AndyBys)). +* Add thevar1able to trusted contributors [#46998](https://github.com/ClickHouse/ClickHouse/pull/46998) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Use /etc/default/clickhouse in systemd too [#47003](https://github.com/ClickHouse/ClickHouse/pull/47003) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tmp_path_template in HTTPHandler::processQuery [#47007](https://github.com/ClickHouse/ClickHouse/pull/47007) ([Vladimir C](https://github.com/vdimir)). +* Fix flaky azure test [#47011](https://github.com/ClickHouse/ClickHouse/pull/47011) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Temporary enable force_sync for keeper in CI [#47024](https://github.com/ClickHouse/ClickHouse/pull/47024) ([alesapin](https://github.com/alesapin)). +* ActionsDAG: do not change result of and() during optimization - part 2 [#47028](https://github.com/ClickHouse/ClickHouse/pull/47028) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add upgrade check to stateful dependent field [#47031](https://github.com/ClickHouse/ClickHouse/pull/47031) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable path check in SQLite storage for clickhouse-local [#47052](https://github.com/ClickHouse/ClickHouse/pull/47052) ([Nikolay Degterinsky](https://github.com/evillique)). +* Terminate long-running offline non-busy runners in EC2 [#47064](https://github.com/ClickHouse/ClickHouse/pull/47064) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix Keeper with `force_sync = false` [#47065](https://github.com/ClickHouse/ClickHouse/pull/47065) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v23.2.2.20-stable [#47069](https://github.com/ClickHouse/ClickHouse/pull/47069) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.1.4.58-stable [#47070](https://github.com/ClickHouse/ClickHouse/pull/47070) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.12.4.76-stable [#47074](https://github.com/ClickHouse/ClickHouse/pull/47074) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix empty result when selection from only one side of join in analyzer [#47093](https://github.com/ClickHouse/ClickHouse/pull/47093) ([Vladimir C](https://github.com/vdimir)). +* Suppress "Cannot flush" for Distributed tables in upgrade check [#47095](https://github.com/ClickHouse/ClickHouse/pull/47095) ([Azat Khuzhin](https://github.com/azat)). +* Make stacktraces in hung check more readable [#47096](https://github.com/ClickHouse/ClickHouse/pull/47096) ([Alexander Tokmakov](https://github.com/tavplubix)). +* release lambda resources before detaching thread group [#47098](https://github.com/ClickHouse/ClickHouse/pull/47098) ([Sema Checherinda](https://github.com/CheSema)). +* Analyzer Planner fixes before enable by default [#47101](https://github.com/ClickHouse/ClickHouse/pull/47101) ([Maksim Kita](https://github.com/kitaisreal)). +* do flushUntrackedMemory when context switches [#47102](https://github.com/ClickHouse/ClickHouse/pull/47102) ([Sema Checherinda](https://github.com/CheSema)). +* fix: keeper systemd service file include invalid inline comment [#47105](https://github.com/ClickHouse/ClickHouse/pull/47105) ([SuperDJY](https://github.com/cmsxbc)). +* Add code for autoscaling lambda [#47107](https://github.com/ClickHouse/ClickHouse/pull/47107) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Enable lightweight delete support by default [#47109](https://github.com/ClickHouse/ClickHouse/pull/47109) ([Alexander Gololobov](https://github.com/davenger)). +* Update typing for a new PyGithub version [#47123](https://github.com/ClickHouse/ClickHouse/pull/47123) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Shorten some code with CTAD [#47139](https://github.com/ClickHouse/ClickHouse/pull/47139) ([Robert Schulze](https://github.com/rschu1ze)). +* Make 01710_projections more stable. [#47145](https://github.com/ClickHouse/ClickHouse/pull/47145) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix_JSON_searchField [#47147](https://github.com/ClickHouse/ClickHouse/pull/47147) ([Aleksei Tikhomirov](https://github.com/aletik256)). +* Mark 01771_bloom_filter_not_has as no-parallel and long [#47148](https://github.com/ClickHouse/ClickHouse/pull/47148) ([Azat Khuzhin](https://github.com/azat)). +* Use unique names and paths in `test_replicated_database` [#47152](https://github.com/ClickHouse/ClickHouse/pull/47152) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add stupid retries in clickhouse-test health check. [#47158](https://github.com/ClickHouse/ClickHouse/pull/47158) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* 02346_full_text_search.sql: Add result separators to simplify analysis [#47166](https://github.com/ClickHouse/ClickHouse/pull/47166) ([Robert Schulze](https://github.com/rschu1ze)). +* More correct handling of fatal errors [#47175](https://github.com/ClickHouse/ClickHouse/pull/47175) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update read in StorageMemory [#47180](https://github.com/ClickHouse/ClickHouse/pull/47180) ([Konstantin Morozov](https://github.com/k-morozov)). +* Doc update for mapFromArrays() [#47183](https://github.com/ClickHouse/ClickHouse/pull/47183) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix failure context for Upgrade check [#47191](https://github.com/ClickHouse/ClickHouse/pull/47191) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add support for different expected errors [#47196](https://github.com/ClickHouse/ClickHouse/pull/47196) ([Raúl Marín](https://github.com/Algunenano)). +* Fix ip coding on s390x [#47208](https://github.com/ClickHouse/ClickHouse/pull/47208) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Add real client (initiator server) address into the logs for interserver mode [#47214](https://github.com/ClickHouse/ClickHouse/pull/47214) ([Azat Khuzhin](https://github.com/azat)). +* Fix 01019_alter_materialized_view_consistent [#47215](https://github.com/ClickHouse/ClickHouse/pull/47215) ([Vladimir C](https://github.com/vdimir)). +* Fix RewriteArrayExistsToHasPass [#47225](https://github.com/ClickHouse/ClickHouse/pull/47225) ([Maksim Kita](https://github.com/kitaisreal)). +* Release shared ptrs after finishing a transaction [#47245](https://github.com/ClickHouse/ClickHouse/pull/47245) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add default constructor for `MultiReadResponse` [#47254](https://github.com/ClickHouse/ClickHouse/pull/47254) ([Antonio Andelic](https://github.com/antonio2368)). +* Join threads if exception happened in `ZooKeeperImpl` constructor [#47261](https://github.com/ClickHouse/ClickHouse/pull/47261) ([Antonio Andelic](https://github.com/antonio2368)). +* use std::lerp, constexpr hex.h [#47268](https://github.com/ClickHouse/ClickHouse/pull/47268) ([Mike Kot](https://github.com/myrrc)). +* Update version_date.tsv and changelogs after v23.2.3.17-stable [#47269](https://github.com/ClickHouse/ClickHouse/pull/47269) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix bug in zero copy replica which can lead to dataloss [#47274](https://github.com/ClickHouse/ClickHouse/pull/47274) ([alesapin](https://github.com/alesapin)). +* Fix typo [#47282](https://github.com/ClickHouse/ClickHouse/pull/47282) ([Nikolay Degterinsky](https://github.com/evillique)). +* Follow-up to [#46681](https://github.com/ClickHouse/ClickHouse/issues/46681) [#47284](https://github.com/ClickHouse/ClickHouse/pull/47284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test 02566_ipv4_ipv6_binary_formats [#47295](https://github.com/ClickHouse/ClickHouse/pull/47295) ([Kruglov Pavel](https://github.com/Avogar)). +* Set fixed index_granularity for test 00636 [#47298](https://github.com/ClickHouse/ClickHouse/pull/47298) ([Sema Checherinda](https://github.com/CheSema)). +* Add a manual trigger for release workflow [#47302](https://github.com/ClickHouse/ClickHouse/pull/47302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix 02570_fallback_from_async_insert [#47308](https://github.com/ClickHouse/ClickHouse/pull/47308) ([Vladimir C](https://github.com/vdimir)). +* Catch exceptions in LiveViewPeriodicRefreshTask [#47309](https://github.com/ClickHouse/ClickHouse/pull/47309) ([Vladimir C](https://github.com/vdimir)). +* Fix MergeTreeTransaction::isReadOnly [#47310](https://github.com/ClickHouse/ClickHouse/pull/47310) ([Vladimir C](https://github.com/vdimir)). +* Fix an assertion with implicit transactions in interserver mode [#47312](https://github.com/ClickHouse/ClickHouse/pull/47312) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `File exists` error in Upgrade check [#47314](https://github.com/ClickHouse/ClickHouse/pull/47314) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support transformQueryForExternalDatabase for analyzer [#47316](https://github.com/ClickHouse/ClickHouse/pull/47316) ([Vladimir C](https://github.com/vdimir)). +* Disable parallel format in health check [#47318](https://github.com/ClickHouse/ClickHouse/pull/47318) ([Ilya Yatsishin](https://github.com/qoega)). +* Analyzer - fix combine logic for limit expression and limit setting [#47324](https://github.com/ClickHouse/ClickHouse/pull/47324) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Suppress expected errors from test 01111 in Upgrade check [#47365](https://github.com/ClickHouse/ClickHouse/pull/47365) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix GROUPING function initialization for grouping sets [#47370](https://github.com/ClickHouse/ClickHouse/pull/47370) ([Dmitry Novik](https://github.com/novikd)). +* Add join_algorithm='grace_hash' to stress tests [#47372](https://github.com/ClickHouse/ClickHouse/pull/47372) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Fix 02343_group_by_use_nulls test in new analyzer [#47373](https://github.com/ClickHouse/ClickHouse/pull/47373) ([Dmitry Novik](https://github.com/novikd)). +* Disable 02368_cancel_write_into_hdfs in stress tests [#47382](https://github.com/ClickHouse/ClickHouse/pull/47382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer planner fixes before enable by default [#47383](https://github.com/ClickHouse/ClickHouse/pull/47383) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `ALTER CLEAR COLUMN` with sparse columns [#47384](https://github.com/ClickHouse/ClickHouse/pull/47384) ([Anton Popov](https://github.com/CurtizJ)). +* Fix: apply reading in order for distinct [#47385](https://github.com/ClickHouse/ClickHouse/pull/47385) ([Igor Nikonov](https://github.com/devcrafter)). +* add checks for ptr [#47398](https://github.com/ClickHouse/ClickHouse/pull/47398) ([Sema Checherinda](https://github.com/CheSema)). +* Remove distinct on top of MergingAggregatedStep [#47399](https://github.com/ClickHouse/ClickHouse/pull/47399) ([Igor Nikonov](https://github.com/devcrafter)). +* Update LRUFileCachePriority.cpp [#47411](https://github.com/ClickHouse/ClickHouse/pull/47411) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make test 02473_optimize_old_parts less flaky [#47416](https://github.com/ClickHouse/ClickHouse/pull/47416) ([Michael Kolupaev](https://github.com/al13n321)). +* Add test to prevent regressions when using bitmapHasAny [#47419](https://github.com/ClickHouse/ClickHouse/pull/47419) ([Jordi Villar](https://github.com/jrdi)). +* Update README.md [#47421](https://github.com/ClickHouse/ClickHouse/pull/47421) ([Tyler Hannan](https://github.com/tylerhannan)). +* Refactor query cache (make use of CacheBase) [#47428](https://github.com/ClickHouse/ClickHouse/pull/47428) ([Robert Schulze](https://github.com/rschu1ze)). +* Suppress Hung Check with UBsan [#47429](https://github.com/ClickHouse/ClickHouse/pull/47429) ([Alexander Tokmakov](https://github.com/tavplubix)). +* [docs] Document add async_insert_max_query_number [#47431](https://github.com/ClickHouse/ClickHouse/pull/47431) ([Antonio Bonuccelli](https://github.com/nellicus)). +* Apply settings for EXPLAIN earlier (in the same way we do for SELECT). [#47433](https://github.com/ClickHouse/ClickHouse/pull/47433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v23.2.4.12-stable [#47448](https://github.com/ClickHouse/ClickHouse/pull/47448) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix aggregation-in-order with aliases. [#47449](https://github.com/ClickHouse/ClickHouse/pull/47449) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix 01429_join_on_error_messages [#47450](https://github.com/ClickHouse/ClickHouse/pull/47450) ([Vladimir C](https://github.com/vdimir)). +* Update version_date.tsv and changelogs after v23.1.5.24-stable [#47452](https://github.com/ClickHouse/ClickHouse/pull/47452) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.12.5.34-stable [#47453](https://github.com/ClickHouse/ClickHouse/pull/47453) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Better error messages in ReplicatedMergeTreeAttachThread [#47454](https://github.com/ClickHouse/ClickHouse/pull/47454) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv and changelogs after v22.8.15.23-lts [#47455](https://github.com/ClickHouse/ClickHouse/pull/47455) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Disable grace hash join in upgrade check [#47474](https://github.com/ClickHouse/ClickHouse/pull/47474) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert [#46622](https://github.com/ClickHouse/ClickHouse/issues/46622) (test_async_insert_memory) [#47476](https://github.com/ClickHouse/ClickHouse/pull/47476) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `00933_test_fix_extra_seek_on_compressed_cache` in releases. [#47490](https://github.com/ClickHouse/ClickHouse/pull/47490) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix long test `02371_select_projection_normal_agg.sql` [#47491](https://github.com/ClickHouse/ClickHouse/pull/47491) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert [#45878](https://github.com/ClickHouse/ClickHouse/issues/45878) and add a test [#47492](https://github.com/ClickHouse/ClickHouse/pull/47492) ([Kruglov Pavel](https://github.com/Avogar)). +* Planner JOIN TREE build fix [#47498](https://github.com/ClickHouse/ClickHouse/pull/47498) ([Maksim Kita](https://github.com/kitaisreal)). +* Better support of identifiers from compound expressions in analyzer [#47506](https://github.com/ClickHouse/ClickHouse/pull/47506) ([Anton Popov](https://github.com/CurtizJ)). +* Adapt some tests to pass with and without the analyzer [#47525](https://github.com/ClickHouse/ClickHouse/pull/47525) ([Raúl Marín](https://github.com/Algunenano)). +* Small enhancements [#47534](https://github.com/ClickHouse/ClickHouse/pull/47534) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support constants in INTERPOLATE clause (new analyzer) [#47539](https://github.com/ClickHouse/ClickHouse/pull/47539) ([Dmitry Novik](https://github.com/novikd)). +* Remove TOTALS handling in FillingTransform [#47542](https://github.com/ClickHouse/ClickHouse/pull/47542) ([Igor Nikonov](https://github.com/devcrafter)). +* Hide too noisy log messages, fix some tests [#47547](https://github.com/ClickHouse/ClickHouse/pull/47547) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix some flaky tests [#47553](https://github.com/ClickHouse/ClickHouse/pull/47553) ([Azat Khuzhin](https://github.com/azat)). +* remove counters for threads, fix negative counters [#47564](https://github.com/ClickHouse/ClickHouse/pull/47564) ([Sema Checherinda](https://github.com/CheSema)). +* Fix typo [#47565](https://github.com/ClickHouse/ClickHouse/pull/47565) ([hq1](https://github.com/aerosol)). +* Fixes for upgrade check [#47570](https://github.com/ClickHouse/ClickHouse/pull/47570) ([Azat Khuzhin](https://github.com/azat)). +* Change error code in case of columns definitions was empty in ODBC [#47573](https://github.com/ClickHouse/ClickHouse/pull/47573) ([Azat Khuzhin](https://github.com/azat)). +* Add missing SYSTEM FLUSH LOGS for log messages statistics [#47575](https://github.com/ClickHouse/ClickHouse/pull/47575) ([Azat Khuzhin](https://github.com/azat)). +* Fix performance regression in aggregation [#47582](https://github.com/ClickHouse/ClickHouse/pull/47582) ([Anton Popov](https://github.com/CurtizJ)). +* ReadFromMergeTree explain prewhere and row policy actions [#47583](https://github.com/ClickHouse/ClickHouse/pull/47583) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible failures of 01300_client_save_history_when_terminated_long [#47606](https://github.com/ClickHouse/ClickHouse/pull/47606) ([Azat Khuzhin](https://github.com/azat)). +* checksum: do not check inverted index files [#47607](https://github.com/ClickHouse/ClickHouse/pull/47607) ([save-my-heart](https://github.com/save-my-heart)). +* Add sanity checks for writing number in variable length format [#47608](https://github.com/ClickHouse/ClickHouse/pull/47608) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer planner fixes before enable by default [#47622](https://github.com/ClickHouse/ClickHouse/pull/47622) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix exception message in clickhouse-test [#47625](https://github.com/ClickHouse/ClickHouse/pull/47625) ([Nikolay Degterinsky](https://github.com/evillique)). +* FillingTransform: remove unnecessary indirection when accessing columns [#47632](https://github.com/ClickHouse/ClickHouse/pull/47632) ([Igor Nikonov](https://github.com/devcrafter)). +* fix typo in HashJoin insertion that enables debug code in release build [#46726](https://github.com/ClickHouse/ClickHouse/issues/46726) [#47647](https://github.com/ClickHouse/ClickHouse/pull/47647) ([jorisgio](https://github.com/jorisgio)). +* clang-tidy >= 15: write CheckOptions in dictionary format [#47648](https://github.com/ClickHouse/ClickHouse/pull/47648) ([Robert Schulze](https://github.com/rschu1ze)). +* CMake: Build ClickHouse w/o GNU extensions [#47651](https://github.com/ClickHouse/ClickHouse/pull/47651) ([Robert Schulze](https://github.com/rschu1ze)). +* Faster fasttest [#47654](https://github.com/ClickHouse/ClickHouse/pull/47654) ([Robert Schulze](https://github.com/rschu1ze)). +* Add background pools size metrics [#47656](https://github.com/ClickHouse/ClickHouse/pull/47656) ([Sergei Trifonov](https://github.com/serxa)). +* Improve ThreadPool [#47657](https://github.com/ClickHouse/ClickHouse/pull/47657) ([Vitaly Baranov](https://github.com/vitlibar)). +* cmake: remove support for gold linker [#47660](https://github.com/ClickHouse/ClickHouse/pull/47660) ([Robert Schulze](https://github.com/rschu1ze)). +* Updated events and recordings [#47668](https://github.com/ClickHouse/ClickHouse/pull/47668) ([clickhouse-adrianfraguela](https://github.com/clickhouse-adrianfraguela)). +* Follow-up to [#47660](https://github.com/ClickHouse/ClickHouse/issues/47660): Further removal of gold linker support [#47669](https://github.com/ClickHouse/ClickHouse/pull/47669) ([Robert Schulze](https://github.com/rschu1ze)). +* Enable parallel execution for two tests [#47670](https://github.com/ClickHouse/ClickHouse/pull/47670) ([Robert Schulze](https://github.com/rschu1ze)). +* Restore native macos build [#47673](https://github.com/ClickHouse/ClickHouse/pull/47673) ([Robert Schulze](https://github.com/rschu1ze)). +* CMake: Remove further cruft from build [#47680](https://github.com/ClickHouse/ClickHouse/pull/47680) ([Robert Schulze](https://github.com/rschu1ze)). +* fix test / remove hardcoded database [#47682](https://github.com/ClickHouse/ClickHouse/pull/47682) ([Denny Crane](https://github.com/den-crane)). +* Apply log_queries_cut_to_length in MergeTreeWhereOptimizer [#47684](https://github.com/ClickHouse/ClickHouse/pull/47684) ([Vladimir C](https://github.com/vdimir)). +* Fix logical error in evaluate constant expression [#47685](https://github.com/ClickHouse/ClickHouse/pull/47685) ([Vladimir C](https://github.com/vdimir)). +* Try making `test_keeper_mntr_data_size` less flaky [#47687](https://github.com/ClickHouse/ClickHouse/pull/47687) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix limit offset [#47688](https://github.com/ClickHouse/ClickHouse/pull/47688) ([flynn](https://github.com/ucasfl)). +* Fix startup on older systemd versions [#47689](https://github.com/ClickHouse/ClickHouse/pull/47689) ([Thomas Casteleyn](https://github.com/Hipska)). +* More random query id in tests [#47700](https://github.com/ClickHouse/ClickHouse/pull/47700) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a style check for unsafe code [#47703](https://github.com/ClickHouse/ClickHouse/pull/47703) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make the code in Join less disgusting [#47712](https://github.com/ClickHouse/ClickHouse/pull/47712) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixup git reference to LLVM [#47719](https://github.com/ClickHouse/ClickHouse/pull/47719) ([Robert Schulze](https://github.com/rschu1ze)). +* Preparation for libcxx(abi), llvm, clang-tidy 16 [#47722](https://github.com/ClickHouse/ClickHouse/pull/47722) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename cfg parameter query_cache.size to query_cache.max_size [#47724](https://github.com/ClickHouse/ClickHouse/pull/47724) ([Robert Schulze](https://github.com/rschu1ze)). +* Add optimization for MemoryStorageStep [#47726](https://github.com/ClickHouse/ClickHouse/pull/47726) ([Konstantin Morozov](https://github.com/k-morozov)). +* Fix aggregation with constant key in planner [#47727](https://github.com/ClickHouse/ClickHouse/pull/47727) ([Dmitry Novik](https://github.com/novikd)). +* Disable setting in 02343_group_by_use_nulls_distributed (for new analyzer) [#47728](https://github.com/ClickHouse/ClickHouse/pull/47728) ([Dmitry Novik](https://github.com/novikd)). +* Add a test for [#21469](https://github.com/ClickHouse/ClickHouse/issues/21469) [#47736](https://github.com/ClickHouse/ClickHouse/pull/47736) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#23804](https://github.com/ClickHouse/ClickHouse/issues/23804) [#47737](https://github.com/ClickHouse/ClickHouse/pull/47737) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#18937](https://github.com/ClickHouse/ClickHouse/issues/18937) [#47738](https://github.com/ClickHouse/ClickHouse/pull/47738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#17756](https://github.com/ClickHouse/ClickHouse/issues/17756) [#47739](https://github.com/ClickHouse/ClickHouse/pull/47739) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#23162](https://github.com/ClickHouse/ClickHouse/issues/23162) [#47740](https://github.com/ClickHouse/ClickHouse/pull/47740) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* remove unused code [#47743](https://github.com/ClickHouse/ClickHouse/pull/47743) ([flynn](https://github.com/ucasfl)). +* Fix broken cross-compiled macos builds [#47744](https://github.com/ClickHouse/ClickHouse/pull/47744) ([Robert Schulze](https://github.com/rschu1ze)). +* Randomize query cache settings [#47749](https://github.com/ClickHouse/ClickHouse/pull/47749) ([Robert Schulze](https://github.com/rschu1ze)). +* Clarify steps for reopened cherry-pick PRs [#47755](https://github.com/ClickHouse/ClickHouse/pull/47755) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix ZK exception error message [#47757](https://github.com/ClickHouse/ClickHouse/pull/47757) ([Raúl Marín](https://github.com/Algunenano)). +* Add ComparisonTupleEliminationVisitor [#47758](https://github.com/ClickHouse/ClickHouse/pull/47758) ([Vladimir C](https://github.com/vdimir)). +* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix partition ID byte order for s390x [#47769](https://github.com/ClickHouse/ClickHouse/pull/47769) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Stop `wait for quorum` retries on shutdown [#47770](https://github.com/ClickHouse/ClickHouse/pull/47770) ([Igor Nikonov](https://github.com/devcrafter)). +* More preparation for upgrade to libcxx(abi), llvm, clang-tidy 16 [#47771](https://github.com/ClickHouse/ClickHouse/pull/47771) ([Robert Schulze](https://github.com/rschu1ze)). +* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Apply black formatter to all python scripts [#47790](https://github.com/ClickHouse/ClickHouse/pull/47790) ([Anton Popov](https://github.com/CurtizJ)). +* Try fix window view test [#47791](https://github.com/ClickHouse/ClickHouse/pull/47791) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update test for nested lambdas [#47795](https://github.com/ClickHouse/ClickHouse/pull/47795) ([Dmitry Novik](https://github.com/novikd)). +* Decrease scale_down ratio for faster deflation [#47798](https://github.com/ClickHouse/ClickHouse/pull/47798) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix 993 and two other tests [#47802](https://github.com/ClickHouse/ClickHouse/pull/47802) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test 02417_opentelemetry_insert_on_distributed_table [#47811](https://github.com/ClickHouse/ClickHouse/pull/47811) ([Azat Khuzhin](https://github.com/azat)). +* Make 01086_odbc_roundtrip less flaky [#47820](https://github.com/ClickHouse/ClickHouse/pull/47820) ([Antonio Andelic](https://github.com/antonio2368)). +* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* [FixTests] Remove wrong chassert() in UserDefinedSQLObjectsLoaderFromZooKeeper.cpp [#47839](https://github.com/ClickHouse/ClickHouse/pull/47839) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix test test_replicated_merge_tree_encryption_codec [#47851](https://github.com/ClickHouse/ClickHouse/pull/47851) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow injecting timeout errors on Keeper [#47856](https://github.com/ClickHouse/ClickHouse/pull/47856) ([Raúl Marín](https://github.com/Algunenano)). +* Comment stale cherry-pick PRs once a day to remind for resolving conflicts [#47857](https://github.com/ClickHouse/ClickHouse/pull/47857) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Followup to [#47802](https://github.com/ClickHouse/ClickHouse/issues/47802) [#47864](https://github.com/ClickHouse/ClickHouse/pull/47864) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Slightly better error message [#47868](https://github.com/ClickHouse/ClickHouse/pull/47868) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Make test_server_reload non-parallel [#47871](https://github.com/ClickHouse/ClickHouse/pull/47871) ([Alexander Tokmakov](https://github.com/tavplubix)). +* aspell-dict.txt: keep sorted things sorted [#47878](https://github.com/ClickHouse/ClickHouse/pull/47878) ([Robert Schulze](https://github.com/rschu1ze)). +* throw exception when all retries exhausted [#47902](https://github.com/ClickHouse/ClickHouse/pull/47902) ([Sema Checherinda](https://github.com/CheSema)). +* Fix GRANT query formatting [#47908](https://github.com/ClickHouse/ClickHouse/pull/47908) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix exception type in arrayElement function [#47909](https://github.com/ClickHouse/ClickHouse/pull/47909) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix logical error in DistributedSink [#47916](https://github.com/ClickHouse/ClickHouse/pull/47916) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix terminate in parts check thread [#47917](https://github.com/ClickHouse/ClickHouse/pull/47917) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Limit keeper request batching by size in bytes [#47918](https://github.com/ClickHouse/ClickHouse/pull/47918) ([Alexander Gololobov](https://github.com/davenger)). +* Improve replicated user defined functions [#47919](https://github.com/ClickHouse/ClickHouse/pull/47919) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update 01072_window_view_multiple_columns_groupby.sh [#47928](https://github.com/ClickHouse/ClickHouse/pull/47928) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added test. Closes [#12264](https://github.com/ClickHouse/ClickHouse/issues/12264) [#47931](https://github.com/ClickHouse/ClickHouse/pull/47931) ([Ilya Yatsishin](https://github.com/qoega)). +* Disallow concurrent backup restore test - removed SYSTEM SYNC [#47944](https://github.com/ClickHouse/ClickHouse/pull/47944) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Set content-length for empty POST requests [#47950](https://github.com/ClickHouse/ClickHouse/pull/47950) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test `02050_client_profile_events` [#47951](https://github.com/ClickHouse/ClickHouse/pull/47951) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)). +* Update docs for parseDateTime() (follow-up to [#46815](https://github.com/ClickHouse/ClickHouse/issues/46815)) [#47959](https://github.com/ClickHouse/ClickHouse/pull/47959) ([Robert Schulze](https://github.com/rschu1ze)). +* Docs: Update secondary index example [#47961](https://github.com/ClickHouse/ClickHouse/pull/47961) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix compilation on MacOS [#47967](https://github.com/ClickHouse/ClickHouse/pull/47967) ([Jordi Villar](https://github.com/jrdi)). +* [Refactoring] Move information about current hosts and list of all hosts to BackupCoordination [#47971](https://github.com/ClickHouse/ClickHouse/pull/47971) ([Vitaly Baranov](https://github.com/vitlibar)). +* Stabilize tests for new function parseDateTimeInJodaSyntax [#47974](https://github.com/ClickHouse/ClickHouse/pull/47974) ([Robert Schulze](https://github.com/rschu1ze)). +* Docs: Fix links [#47976](https://github.com/ClickHouse/ClickHouse/pull/47976) ([Robert Schulze](https://github.com/rschu1ze)). +* Try fix rabbitmq test [#47987](https://github.com/ClickHouse/ClickHouse/pull/47987) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better type check in arrayElement function [#47989](https://github.com/ClickHouse/ClickHouse/pull/47989) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect code indentation [#48011](https://github.com/ClickHouse/ClickHouse/pull/48011) ([exmy](https://github.com/exmy)). +* CMake: Remove configuration of CMAKE_SHARED_LINKER_FLAGS [#48018](https://github.com/ClickHouse/ClickHouse/pull/48018) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove the old changelog script [#48042](https://github.com/ClickHouse/ClickHouse/pull/48042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix automatic indentation in the built-in UI SQL editor [#48045](https://github.com/ClickHouse/ClickHouse/pull/48045) ([Nikolay Degterinsky](https://github.com/evillique)). +* Rename `system.marked_dropped_tables` to `dropped_tables` [#48048](https://github.com/ClickHouse/ClickHouse/pull/48048) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Automatically correct some mistakes in the changelog [#48052](https://github.com/ClickHouse/ClickHouse/pull/48052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Docs: Document [FULL] keyword in SHOW TABLES [#48061](https://github.com/ClickHouse/ClickHouse/pull/48061) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix stateless tests numbers [#48063](https://github.com/ClickHouse/ClickHouse/pull/48063) ([Raúl Marín](https://github.com/Algunenano)). +* Docs: Update syntax of some SHOW queries [#48064](https://github.com/ClickHouse/ClickHouse/pull/48064) ([Robert Schulze](https://github.com/rschu1ze)). +* Simplify backup coordination for file infos [#48095](https://github.com/ClickHouse/ClickHouse/pull/48095) ([Vitaly Baranov](https://github.com/vitlibar)). +* materialized pg small fix [#48098](https://github.com/ClickHouse/ClickHouse/pull/48098) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update SQLite to 3.41.2 [#48101](https://github.com/ClickHouse/ClickHouse/pull/48101) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix test numbers again and enforce it with style [#48106](https://github.com/ClickHouse/ClickHouse/pull/48106) ([Raúl Marín](https://github.com/Algunenano)). +* s390x reinterpret as float64 [#48112](https://github.com/ClickHouse/ClickHouse/pull/48112) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Remove slow outdated test [#48114](https://github.com/ClickHouse/ClickHouse/pull/48114) ([alesapin](https://github.com/alesapin)). +* Cosmetic follow-up to [#46252](https://github.com/ClickHouse/ClickHouse/issues/46252) [#48128](https://github.com/ClickHouse/ClickHouse/pull/48128) ([Robert Schulze](https://github.com/rschu1ze)). +* Merging "Support undrop table" [#48130](https://github.com/ClickHouse/ClickHouse/pull/48130) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix double whitespace in exception message [#48132](https://github.com/ClickHouse/ClickHouse/pull/48132) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve script for updating clickhouse-docs [#48135](https://github.com/ClickHouse/ClickHouse/pull/48135) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix stdlib compatibility issues [#48150](https://github.com/ClickHouse/ClickHouse/pull/48150) ([DimasKovas](https://github.com/DimasKovas)). +* Make test test_disallow_concurrency less flaky [#48152](https://github.com/ClickHouse/ClickHouse/pull/48152) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove unused mockSystemDatabase from gtest_transform_query_for_exter… [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)). +* Update environmental-sensors.md [#48166](https://github.com/ClickHouse/ClickHouse/pull/48166) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly handle NULL constants in logical optimizer for new analyzer [#48168](https://github.com/ClickHouse/ClickHouse/pull/48168) ([Antonio Andelic](https://github.com/antonio2368)). +* Try making KeeperMap test more stable [#48170](https://github.com/ClickHouse/ClickHouse/pull/48170) ([Antonio Andelic](https://github.com/antonio2368)). +* Deprecate EXPLAIN QUERY TREE with disabled analyzer. [#48177](https://github.com/ClickHouse/ClickHouse/pull/48177) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Use uniq file names in 02149_* tests to avoid SIGBUS in stress tests [#48187](https://github.com/ClickHouse/ClickHouse/pull/48187) ([Kruglov Pavel](https://github.com/Avogar)). +* Update style in ParserKQLSort.cpp [#48199](https://github.com/ClickHouse/ClickHouse/pull/48199) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove support for std::unary/binary_function (removed in C++17) [#48204](https://github.com/ClickHouse/ClickHouse/pull/48204) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove unused setting [#48208](https://github.com/ClickHouse/ClickHouse/pull/48208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove wrong assert from LogicalExpressionOptimizerPass [#48214](https://github.com/ClickHouse/ClickHouse/pull/48214) ([Antonio Andelic](https://github.com/antonio2368)). +* MySQL compatibility: Make str_to_date alias case-insensitive [#48220](https://github.com/ClickHouse/ClickHouse/pull/48220) ([Robert Schulze](https://github.com/rschu1ze)). +* Disable AST optimizations for projection analysis. [#48221](https://github.com/ClickHouse/ClickHouse/pull/48221) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Too big of a difference between test numbers [#48224](https://github.com/ClickHouse/ClickHouse/pull/48224) ([Vladimir C](https://github.com/vdimir)). +* Stabilize 02477_age [#48225](https://github.com/ClickHouse/ClickHouse/pull/48225) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename setting stop_reading_on_first_cancel [#48226](https://github.com/ClickHouse/ClickHouse/pull/48226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Address flaky 02346_full_text_search [#48227](https://github.com/ClickHouse/ClickHouse/pull/48227) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix incorrect ThreadPool usage after ThreadPool introspection [#48244](https://github.com/ClickHouse/ClickHouse/pull/48244) ([Azat Khuzhin](https://github.com/azat)). +* fix test numbers again [#48264](https://github.com/ClickHouse/ClickHouse/pull/48264) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### Testing Improvement + +* Fixed functional test 02534_keyed_siphash and 02552_siphash128_reference for s390x. [#47615](https://github.com/ClickHouse/ClickHouse/pull/47615) ([Harry Lee](https://github.com/HarryLeeIBM)). + diff --git a/docs/changelogs/v23.3.2.37-lts.md b/docs/changelogs/v23.3.2.37-lts.md new file mode 100644 index 00000000000..69602b573c5 --- /dev/null +++ b/docs/changelogs/v23.3.2.37-lts.md @@ -0,0 +1,35 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.2.37-lts (1b144bcd101) FIXME as compared to v23.3.1.2823-lts (46e85357ce2) + +#### Improvement +* Backported in [#48459](https://github.com/ClickHouse/ClickHouse/issues/48459): Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#48842](https://github.com/ClickHouse/ClickHouse/issues/48842): Fix some mysql related settings not being handled with mysql dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#49035](https://github.com/ClickHouse/ClickHouse/issues/49035): Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48589](https://github.com/ClickHouse/ClickHouse/issues/48589): Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#48960](https://github.com/ClickHouse/ClickHouse/issues/48960): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ThreadPool for DistributedSink and use StrongTypedef for CurrentMetrics/ProfileEvents/StatusInfo to avoid further errors [#48314](https://github.com/ClickHouse/ClickHouse/pull/48314) ([Azat Khuzhin](https://github.com/azat)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Batch fix for projections analysis with analyzer. [#48357](https://github.com/ClickHouse/ClickHouse/pull/48357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a confusing warning about interserver mode [#48793](https://github.com/ClickHouse/ClickHouse/pull/48793) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.4.1.1943-stable.md b/docs/changelogs/v23.4.1.1943-stable.md new file mode 100644 index 00000000000..ea16f5856be --- /dev/null +++ b/docs/changelogs/v23.4.1.1943-stable.md @@ -0,0 +1,375 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.4.1.1943-stable (3920eb987f7) FIXME as compared to v23.3.1.2823-lts (46e85357ce2) + +#### Backward Incompatible Change +* If `path` in cache configuration is not empty and is not absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Compatibility setting `parallelize_output_from_storages` to enable behavior before [#48727](https://github.com/ClickHouse/ClickHouse/issues/48727). [#49101](https://github.com/ClickHouse/ClickHouse/pull/49101) ([Igor Nikonov](https://github.com/devcrafter)). + +#### New Feature +* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). +* Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Closes [#45710](https://github.com/ClickHouse/ClickHouse/issues/45710). [#46089](https://github.com/ClickHouse/ClickHouse/pull/46089) ([Dmitry Novik](https://github.com/novikd)). +* Support new aggregate function quantileGK/quantilesGK, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). +* Add statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add kafkaMurmurHash function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). +* Add statistical aggregate function `kolmogorovSmirnovTest`. close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). +* Add soundex function. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). +* Support map type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). +* Add PrettyJSONEachRow format to output pretty JSON with new line delimieters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). +* Add ParquetMetadata input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Reading files in Parquet format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). +* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). +* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continiously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica will decide to read with different algorithm - an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). +* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). +* Query processing is parallelized right after reading from a data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). +* Using correct memory order for counter in `numebers_mt()`. [#48729](https://github.com/ClickHouse/ClickHouse/pull/48729) ([Igor Nikonov](https://github.com/devcrafter)). +* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). +* Simplify accounting of approximate size of granule in prefetched read pool. [#49051](https://github.com/ClickHouse/ClickHouse/pull/49051) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). +* Many issues in ClickHouse applications's help were fixed. Help is now written to stdout from all tools. Status code for `clickhouse help` invocation is now 0. Updated help for `clickhouse-local`, `clickhouse-benchmark`, `clickhouse-client`, `clickhouse hash`, `clickhouse su`, `clickhouse-install`. [#45819](https://github.com/ClickHouse/ClickHouse/pull/45819) ([Ilya Yatsishin](https://github.com/qoega)). +* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). +* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). +* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). +* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can lookup the set in the cache, wait for it be be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). +* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). +* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* Several improvements around data lakes: - Make StorageIceberg work with non-partitioned data. - Support Iceberg format version V2 (previously only V1 was supported) - Support reading partitioned data for DeltaLake/Hudi - Faster reading of DeltaLake metadata by using Delta's checkpoint files - Fixed incorrect Hudi reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for Iceberg/DeltaLake/Hudi using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). +* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from json string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complext type value, such as a json object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add CNF/constraint optimizer in new analyzer. [#47617](https://github.com/ClickHouse/ClickHouse/pull/47617) ([Antonio Andelic](https://github.com/antonio2368)). +* For use_structure_from_insertion_table_in_table_functions more flexible insert table structure propagation to table function. Fixed bug with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Do not continue retrying to connect to ZK if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). +* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). +* Support Enum output/input in BSONEachRow, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). +* Support more ClickHouse types in ORC/Arrow/Parquet formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32 and we couldn't read it back), fix reading Nullable(IPv6) from binary data for ORC. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). +* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). +* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function mapFromArrays support map type as input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). +* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). +* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). +* Support more types in CapnProto format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). +* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). +* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). +* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). +* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are mathced by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). +* Parsedatetime now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). +* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). +* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). +* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). +* Just fix small typo in comment around `lockForAlter` method in `IStorage.h`. [#48559](https://github.com/ClickHouse/ClickHouse/pull/48559) ([artem-pershin](https://github.com/artem-pershin)). +* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). +* HTTP temporary buffer support working with fs cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). +* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). +* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix some mysql related settings not being handled with mysql dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix squashing in query cache. [#48763](https://github.com/ClickHouse/ClickHouse/pull/48763) ([Robert Schulze](https://github.com/rschu1ze)). +* Support following new jsonpath format - '$.1key', path element begins with number - '$[key]', '$[“key”]', '$[\\\'key\\\']', '$["key 123"]', path element embraced in []. [#48768](https://github.com/ClickHouse/ClickHouse/pull/48768) ([lgbo](https://github.com/lgbo-ustc)). +* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Not for changelog. [#48824](https://github.com/ClickHouse/ClickHouse/pull/48824) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix data race in `StorageRabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). +* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). +* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). +* Not for changelog. [#48873](https://github.com/ClickHouse/ClickHouse/pull/48873) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). +* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). +* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). +* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). +* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or MacOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added field `rows` with number of rows parsed from asynchronous insert to `system.asynchronous_insert_log`. [#49120](https://github.com/ClickHouse/ClickHouse/pull/49120) ([Anton Popov](https://github.com/CurtizJ)). +* 1. Bump Intel QPL from v1.0.0 to v1.1.0 (fixes build issue [#47877](https://github.com/ClickHouse/ClickHouse/issues/47877)) 2. the DEFLATE_QPL codec now respects the maximum hardware jobs returned by libaccel_config. [#49126](https://github.com/ClickHouse/ClickHouse/pull/49126) ([jasperzhu](https://github.com/jinjunzh)). + +#### Build/Testing/Packaging Improvement +* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). +* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). +* With the current approach, all ports are calculated at the beginning and could overlap or even be highjacked, see [the report](https://s3.amazonaws.com/clickhouse-test-reports/46793/02928ae50c52f31ce8e5bfa99eb1b5db046f4a4f/integration_tests__release__[1/2]/integration_run_parallel8_0.log) for `port is already allocated`. It's possibly the reason for [#45368](https://github.com/ClickHouse/ClickHouse/issues/45368). [#48393](https://github.com/ClickHouse/ClickHouse/pull/48393) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). +* Not for changelog. [#48879](https://github.com/ClickHouse/ClickHouse/pull/48879) ([larryluogit](https://github.com/larryluogit)). +* After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Run more functional tests concurrently. [#48970](https://github.com/ClickHouse/ClickHouse/pull/48970) ([alesapin](https://github.com/alesapin)). +* Fix glibc compatibility check: replace `preadv` from musl. [#49144](https://github.com/ClickHouse/ClickHouse/pull/49144) ([alesapin](https://github.com/alesapin)). +* Use position independent encoding/code for sanitizers (at least msan :D) build to avoid issues with maximum relocation size. [#49145](https://github.com/ClickHouse/ClickHouse/pull/49145) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). +* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). +* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). +* Fix crash for uploading parts which size is greater then INT_MAX to S3 [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). +* Fix overflow in sparkbar function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). +* Fix race in StorageS3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). +* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ThreadPool for DistributedSink and use StrongTypedef for CurrentMetrics/ProfileEvents/StatusInfo to avoid further errors [#48314](https://github.com/ClickHouse/ClickHouse/pull/48314) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). +* Check node for Backup Restore concurrency [#48342](https://github.com/ClickHouse/ClickHouse/pull/48342) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* close client [#48347](https://github.com/ClickHouse/ClickHouse/pull/48347) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). +* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update only affected rows in KV storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* toTimeZone function throw an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). +* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). +* "changed" flag in system.settings is calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bracketed-paste mode messing up password input in client reconnect [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). +* Avoid sending `nullptr` to `memcpy` in `copyStringInArena` [#48532](https://github.com/ClickHouse/ClickHouse/pull/48532) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). +* `groupArray` returns cannot be nullable [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix async inserts with empty data [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). +* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). +* Fix zero-copy-replication on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix skip_unavailable_shards in case of unavailable hosts [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). +* Fix key condition on duplicate primary keys [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). +* Fix for race in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). +* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). +* Fix reading Date32 Parquet/Arrow column into not Date32 column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix UNKNOWN_IDENTIFIER error while select from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix aggregate empty string error [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). +* Fix postgres database setting [#49100](https://github.com/ClickHouse/ClickHouse/pull/49100) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test_cache_with_full_disk_space [#49110](https://github.com/ClickHouse/ClickHouse/pull/49110) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix "prepared statement insert already exists" [#49154](https://github.com/ClickHouse/ClickHouse/pull/49154) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix replace[Regexp]{One,All}() with const haystacks [#49220](https://github.com/ClickHouse/ClickHouse/pull/49220) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build Improvement + +* Fixed hashing issue in creating partition IDs for s390x. [#48134](https://github.com/ClickHouse/ClickHouse/pull/48134) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Randomize JIT settings in tests"'. [#48277](https://github.com/ClickHouse/ClickHouse/pull/48277) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Fix test "02494_query_cache_drop.sql"'. [#48358](https://github.com/ClickHouse/ClickHouse/pull/48358) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "Check simple dictionary key is native unsigned integer"'. [#48732](https://github.com/ClickHouse/ClickHouse/pull/48732) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Make Schema inference works for CREATE AS SELECT"'. [#48758](https://github.com/ClickHouse/ClickHouse/pull/48758) ([pufit](https://github.com/pufit)). +* NO CL ENTRY: 'Revert "Add MemoryTracker for the background tasks"'. [#48760](https://github.com/ClickHouse/ClickHouse/pull/48760) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Added tests for ClickHouse apps help and fixed help issues"'. [#48991](https://github.com/ClickHouse/ClickHouse/pull/48991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Adapt marks count for prefetch read pool"'. [#49068](https://github.com/ClickHouse/ClickHouse/pull/49068) ([Nikita Taranov](https://github.com/nickitat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* merge and mutation make thread group for setting memory trackers right [#47104](https://github.com/ClickHouse/ClickHouse/pull/47104) ([Sema Checherinda](https://github.com/CheSema)). +* Query plan: update sort description [#47319](https://github.com/ClickHouse/ClickHouse/pull/47319) ([Igor Nikonov](https://github.com/devcrafter)). +* Sqllogic [#47784](https://github.com/ClickHouse/ClickHouse/pull/47784) ([Sema Checherinda](https://github.com/CheSema)). +* Fix race between DROP MatView and RESTART REPLICAS [#47863](https://github.com/ClickHouse/ClickHouse/pull/47863) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merge [#35113](https://github.com/ClickHouse/ClickHouse/issues/35113) [#47934](https://github.com/ClickHouse/ClickHouse/pull/47934) ([Antonio Andelic](https://github.com/antonio2368)). +* Add a test for ClientInfo initial_query_start_time in inter-server mode [#48036](https://github.com/ClickHouse/ClickHouse/pull/48036) ([Azat Khuzhin](https://github.com/azat)). +* Make custom key for parallel replicas work in new analyzer [#48054](https://github.com/ClickHouse/ClickHouse/pull/48054) ([Antonio Andelic](https://github.com/antonio2368)). +* throw exception while non-parametric functions having parameters [#48115](https://github.com/ClickHouse/ClickHouse/pull/48115) ([save-my-heart](https://github.com/save-my-heart)). +* Move FunctionsJSON implementation to header file [#48142](https://github.com/ClickHouse/ClickHouse/pull/48142) ([DimasKovas](https://github.com/DimasKovas)). +* Use ThreadPool in PipelineExecutor [#48146](https://github.com/ClickHouse/ClickHouse/pull/48146) ([Azat Khuzhin](https://github.com/azat)). +* Add sanity checks for writing number in variable length format (resubmit) [#48154](https://github.com/ClickHouse/ClickHouse/pull/48154) ([Azat Khuzhin](https://github.com/azat)). +* Try fix 02151_hash_table_sizes_stats.sh test [#48178](https://github.com/ClickHouse/ClickHouse/pull/48178) ([Nikita Taranov](https://github.com/nickitat)). +* Add scripts for sparse checkout of some contribs [#48183](https://github.com/ClickHouse/ClickHouse/pull/48183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not take lock for shared context in setTempDataOnDisk [#48219](https://github.com/ClickHouse/ClickHouse/pull/48219) ([Vladimir C](https://github.com/vdimir)). +* parseDateTime[InJodaSyntax](): Require format argument [#48222](https://github.com/ClickHouse/ClickHouse/pull/48222) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not partially cancel processors added from expand pipeline. [#48231](https://github.com/ClickHouse/ClickHouse/pull/48231) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix some tests [#48267](https://github.com/ClickHouse/ClickHouse/pull/48267) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix compiling examples without Hive [#48269](https://github.com/ClickHouse/ClickHouse/pull/48269) ([Azat Khuzhin](https://github.com/azat)). +* In messages, put values into quotes [#48271](https://github.com/ClickHouse/ClickHouse/pull/48271) ([Vadim Chekan](https://github.com/vchekan)). +* Fix 01710_projection_optimize_materialize flakiness [#48276](https://github.com/ClickHouse/ClickHouse/pull/48276) ([Azat Khuzhin](https://github.com/azat)). +* Fix UB (signed integer overflow) in StorageMergeTree::backupData() [#48278](https://github.com/ClickHouse/ClickHouse/pull/48278) ([Azat Khuzhin](https://github.com/azat)). +* Update version after release [#48279](https://github.com/ClickHouse/ClickHouse/pull/48279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.3.1.2823-lts [#48281](https://github.com/ClickHouse/ClickHouse/pull/48281) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Small follow-up to [#48017](https://github.com/ClickHouse/ClickHouse/issues/48017) [#48292](https://github.com/ClickHouse/ClickHouse/pull/48292) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to update arrow library to release 11.0.0 [#48294](https://github.com/ClickHouse/ClickHouse/pull/48294) ([Kruglov Pavel](https://github.com/Avogar)). +* fix test numbers again 2 [#48295](https://github.com/ClickHouse/ClickHouse/pull/48295) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix: copy forgotten show_secrets in FormatSettings semi-copy-ctor [#48297](https://github.com/ClickHouse/ClickHouse/pull/48297) ([Natasha Murashkina](https://github.com/murfel)). +* Do not remove inputs from maybe compiled DAG. [#48303](https://github.com/ClickHouse/ClickHouse/pull/48303) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v22.3.20.29-lts [#48304](https://github.com/ClickHouse/ClickHouse/pull/48304) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.12.6.22-stable, v22.3.20.29-lts [#48305](https://github.com/ClickHouse/ClickHouse/pull/48305) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Merging [#46323](https://github.com/ClickHouse/ClickHouse/issues/46323) [#48312](https://github.com/ClickHouse/ClickHouse/pull/48312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Follow-up to [#47863](https://github.com/ClickHouse/ClickHouse/issues/47863) [#48315](https://github.com/ClickHouse/ClickHouse/pull/48315) ([Alexander Tokmakov](https://github.com/tavplubix)). +* test / some complex query (it fails with analyzer enabled) [#48324](https://github.com/ClickHouse/ClickHouse/pull/48324) ([Denny Crane](https://github.com/den-crane)). +* Fix constraints after merge [#48328](https://github.com/ClickHouse/ClickHouse/pull/48328) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add logging for concurrency checks for backups [#48337](https://github.com/ClickHouse/ClickHouse/pull/48337) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update version_date.tsv and changelogs after v23.1.6.42-stable [#48345](https://github.com/ClickHouse/ClickHouse/pull/48345) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.2.5.46-stable [#48346](https://github.com/ClickHouse/ClickHouse/pull/48346) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix lambda type resolution [#48355](https://github.com/ClickHouse/ClickHouse/pull/48355) ([Dmitry Novik](https://github.com/novikd)). +* Avoid abort in protobuf library in debug build [#48356](https://github.com/ClickHouse/ClickHouse/pull/48356) ([Kruglov Pavel](https://github.com/Avogar)). +* Batch fix for projections analysis with analyzer. [#48357](https://github.com/ClickHouse/ClickHouse/pull/48357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix tests with explain and analyzer where names changed. [#48360](https://github.com/ClickHouse/ClickHouse/pull/48360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Small follow-up to [#45912](https://github.com/ClickHouse/ClickHouse/issues/45912) [#48373](https://github.com/ClickHouse/ClickHouse/pull/48373) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v22.8.16.32-lts [#48376](https://github.com/ClickHouse/ClickHouse/pull/48376) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add script for a slack bot that reports broken tests [#48382](https://github.com/ClickHouse/ClickHouse/pull/48382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky `test_keeper_mntr_data_size` [#48384](https://github.com/ClickHouse/ClickHouse/pull/48384) ([Antonio Andelic](https://github.com/antonio2368)). +* WITH FILL clarification and cleanup [#48395](https://github.com/ClickHouse/ClickHouse/pull/48395) ([Igor Nikonov](https://github.com/devcrafter)). +* Cleanup mess in .clang-tidy [#48396](https://github.com/ClickHouse/ClickHouse/pull/48396) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test_backup_all [#48400](https://github.com/ClickHouse/ClickHouse/pull/48400) ([Vitaly Baranov](https://github.com/vitlibar)). +* Find big allocations without memory limits checks [#48401](https://github.com/ClickHouse/ClickHouse/pull/48401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix issue with krb5 and building w/ OpenSSL [#48407](https://github.com/ClickHouse/ClickHouse/pull/48407) ([Boris Kuschel](https://github.com/bkuschel)). +* Make CI slack bot less noisy [#48409](https://github.com/ClickHouse/ClickHouse/pull/48409) ([Alexander Tokmakov](https://github.com/tavplubix)). +* AST fuzzer: Fix assertion in TopK serialization [#48412](https://github.com/ClickHouse/ClickHouse/pull/48412) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible flakiness of lightweight delete tests (due to index granularity randomization) [#48413](https://github.com/ClickHouse/ClickHouse/pull/48413) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `test_keeper_snapshots` [#48417](https://github.com/ClickHouse/ClickHouse/pull/48417) ([Antonio Andelic](https://github.com/antonio2368)). +* Update sort desc: more efficient original node search in ActionsDAG [#48427](https://github.com/ClickHouse/ClickHouse/pull/48427) ([Igor Nikonov](https://github.com/devcrafter)). +* test for [#16399](https://github.com/ClickHouse/ClickHouse/issues/16399) [#48439](https://github.com/ClickHouse/ClickHouse/pull/48439) ([Denny Crane](https://github.com/den-crane)). +* Better exception messages from Keeper client [#48444](https://github.com/ClickHouse/ClickHouse/pull/48444) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small documentation follow-up to [#47246](https://github.com/ClickHouse/ClickHouse/issues/47246) [#48463](https://github.com/ClickHouse/ClickHouse/pull/48463) ([Robert Schulze](https://github.com/rschu1ze)). +* Update 00002_log_and_exception_messages_formatting.sql [#48467](https://github.com/ClickHouse/ClickHouse/pull/48467) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid operation on uninitialised data in readDateTimeTextImpl [#48472](https://github.com/ClickHouse/ClickHouse/pull/48472) ([Kruglov Pavel](https://github.com/Avogar)). +* Add reading step for system zookeeper. Analyze path from filter DAG. [#48485](https://github.com/ClickHouse/ClickHouse/pull/48485) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock due to debug tracking of memory allocations [#48487](https://github.com/ClickHouse/ClickHouse/pull/48487) ([Azat Khuzhin](https://github.com/azat)). +* Register datediff and trim aliases in system.functions [#48489](https://github.com/ClickHouse/ClickHouse/pull/48489) ([Robert Schulze](https://github.com/rschu1ze)). +* Change error code [#48490](https://github.com/ClickHouse/ClickHouse/pull/48490) ([Anton Popov](https://github.com/CurtizJ)). +* Update 00002_log_and_exception_messages_formatting.sql [#48499](https://github.com/ClickHouse/ClickHouse/pull/48499) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix query cache with sparse columns [#48500](https://github.com/ClickHouse/ClickHouse/pull/48500) ([Anton Popov](https://github.com/CurtizJ)). +* Use std::string_view to get rid of strlen [#48509](https://github.com/ClickHouse/ClickHouse/pull/48509) ([ltrk2](https://github.com/ltrk2)). +* Fix bytesSize() of zk SetRequest [#48512](https://github.com/ClickHouse/ClickHouse/pull/48512) ([Sergei Trifonov](https://github.com/serxa)). +* Remove dead code and unused dependencies [#48518](https://github.com/ClickHouse/ClickHouse/pull/48518) ([ltrk2](https://github.com/ltrk2)). +* Use forward declaration of ThreadPool [#48519](https://github.com/ClickHouse/ClickHouse/pull/48519) ([Azat Khuzhin](https://github.com/azat)). +* Use std::string_view instead of strlen [#48520](https://github.com/ClickHouse/ClickHouse/pull/48520) ([ltrk2](https://github.com/ltrk2)). +* Use std::string::starts_with instead of a roll your own variant [#48521](https://github.com/ClickHouse/ClickHouse/pull/48521) ([ltrk2](https://github.com/ltrk2)). +* Fix flaky `test_alternative_keeper_config` [#48533](https://github.com/ClickHouse/ClickHouse/pull/48533) ([Antonio Andelic](https://github.com/antonio2368)). +* Use one ThreadGroup while pushing to materialized views (and some refactoring for ThreadGroup) [#48543](https://github.com/ClickHouse/ClickHouse/pull/48543) ([Azat Khuzhin](https://github.com/azat)). +* Fix some tests [#48550](https://github.com/ClickHouse/ClickHouse/pull/48550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 02477_projection_materialize_and_zero_copy flakiness (due to index granularity randomization) [#48551](https://github.com/ClickHouse/ClickHouse/pull/48551) ([Azat Khuzhin](https://github.com/azat)). +* Better exception message for ZSTD [#48552](https://github.com/ClickHouse/ClickHouse/pull/48552) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove misleading comment and block [#48562](https://github.com/ClickHouse/ClickHouse/pull/48562) ([Sergei Trifonov](https://github.com/serxa)). +* Update 02207_allow_plaintext_and_no_password.sh [#48566](https://github.com/ClickHouse/ClickHouse/pull/48566) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* bugfix: compare Bits and sizeof(Arithmetic) * 8 [#48569](https://github.com/ClickHouse/ClickHouse/pull/48569) ([caipengxiang](https://github.com/awfeequdng)). +* Remove superfluous includes of logger_userful.h from headers [#48570](https://github.com/ClickHouse/ClickHouse/pull/48570) ([Azat Khuzhin](https://github.com/azat)). +* Remove slow test from debug builds [#48574](https://github.com/ClickHouse/ClickHouse/pull/48574) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't use type conversion with String query parameters [#48577](https://github.com/ClickHouse/ClickHouse/pull/48577) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix TSan report in Kerberos [#48579](https://github.com/ClickHouse/ClickHouse/pull/48579) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add second_deadlock_stack=1 for TSan on CI and fix some lock-order-inversion problems [#48596](https://github.com/ClickHouse/ClickHouse/pull/48596) ([Azat Khuzhin](https://github.com/azat)). +* Fix LOGICAL_ERROR in executable table function [#48605](https://github.com/ClickHouse/ClickHouse/pull/48605) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix flakiness of test_store_cleanup in case of image rebuild [#48610](https://github.com/ClickHouse/ClickHouse/pull/48610) ([Azat Khuzhin](https://github.com/azat)). +* Remove strange code [#48612](https://github.com/ClickHouse/ClickHouse/pull/48612) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor refactoring of formatDateTime() [#48627](https://github.com/ClickHouse/ClickHouse/pull/48627) ([Robert Schulze](https://github.com/rschu1ze)). +* Better handling of values too large for VarInt encoding [#48628](https://github.com/ClickHouse/ClickHouse/pull/48628) ([Robert Schulze](https://github.com/rschu1ze)). +* refine some messages of exception in regexp tree [#48632](https://github.com/ClickHouse/ClickHouse/pull/48632) ([Han Fei](https://github.com/hanfei1991)). +* Partially revert e0252db8d and fix pr-bugfix labeling [#48637](https://github.com/ClickHouse/ClickHouse/pull/48637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix build src/Interpreters/InterpreterInsertQuery.h [#48638](https://github.com/ClickHouse/ClickHouse/pull/48638) ([Vladimir C](https://github.com/vdimir)). +* Fix build ThreadGroupPtr [#48641](https://github.com/ClickHouse/ClickHouse/pull/48641) ([Vladimir C](https://github.com/vdimir)). +* Fix flaky test test_drop_replica_and_achieve_quorum [#48642](https://github.com/ClickHouse/ClickHouse/pull/48642) ([Kruglov Pavel](https://github.com/Avogar)). +* fix 02504_regexp_dictionary_table_source [#48662](https://github.com/ClickHouse/ClickHouse/pull/48662) ([Han Fei](https://github.com/hanfei1991)). +* Remove strange code from MutateTask [#48666](https://github.com/ClickHouse/ClickHouse/pull/48666) ([alesapin](https://github.com/alesapin)). +* SonarCloud: C++ Reporting Standards [#48668](https://github.com/ClickHouse/ClickHouse/pull/48668) ([Julio Jimenez](https://github.com/juliojimenez)). +* Remove lock for duplicated parts UUIDs (allow_experimental_query_deduplication=1) [#48670](https://github.com/ClickHouse/ClickHouse/pull/48670) ([Azat Khuzhin](https://github.com/azat)). +* show result of minio listings for test test_attach_detach_partition [#48674](https://github.com/ClickHouse/ClickHouse/pull/48674) ([Sema Checherinda](https://github.com/CheSema)). +* Fix tests for analyzer [#48675](https://github.com/ClickHouse/ClickHouse/pull/48675) ([Igor Nikonov](https://github.com/devcrafter)). +* Call IProcessor::onCancel() once [#48687](https://github.com/ClickHouse/ClickHouse/pull/48687) ([Igor Nikonov](https://github.com/devcrafter)). +* Update MergeTree syntax for optional index granularity argument [#48692](https://github.com/ClickHouse/ClickHouse/pull/48692) ([Robert Schulze](https://github.com/rschu1ze)). +* Add test for old bug [#7826](https://github.com/ClickHouse/ClickHouse/issues/7826) [#48697](https://github.com/ClickHouse/ClickHouse/pull/48697) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky `test_keeper_session` [#48699](https://github.com/ClickHouse/ClickHouse/pull/48699) ([Antonio Andelic](https://github.com/antonio2368)). +* Better messages formatting in the CI Slack bot [#48712](https://github.com/ClickHouse/ClickHouse/pull/48712) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add trusted contributors [#48715](https://github.com/ClickHouse/ClickHouse/pull/48715) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Do not remove broken detached parts on startup [#48730](https://github.com/ClickHouse/ClickHouse/pull/48730) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove `-Wshadow` suppression which leaked into global namespace [#48737](https://github.com/ClickHouse/ClickHouse/pull/48737) ([Robert Schulze](https://github.com/rschu1ze)). +* VarInt coding: Always perform sanity check [#48740](https://github.com/ClickHouse/ClickHouse/pull/48740) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to fix flaky 02455_one_row_from_csv_memory_usage [#48756](https://github.com/ClickHouse/ClickHouse/pull/48756) ([Dmitry Novik](https://github.com/novikd)). +* insert UInt32 Hashvalue in reverse order on big endian machine [#48764](https://github.com/ClickHouse/ClickHouse/pull/48764) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Limit size of messages from the CI slack bot [#48766](https://github.com/ClickHouse/ClickHouse/pull/48766) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update README.md [#48776](https://github.com/ClickHouse/ClickHouse/pull/48776) ([Tyler Hannan](https://github.com/tylerhannan)). +* Remove duplicate definition of SingleEndpointHTTPSessionPool [#48779](https://github.com/ClickHouse/ClickHouse/pull/48779) ([JaySon](https://github.com/JaySon-Huang)). +* Fix flaky test_version_update_after_mutation/test.py::test_upgrade_while_mutation [#48783](https://github.com/ClickHouse/ClickHouse/pull/48783) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test test_backup_all [#48789](https://github.com/ClickHouse/ClickHouse/pull/48789) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a confusing warning about interserver mode [#48793](https://github.com/ClickHouse/ClickHouse/pull/48793) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Store clusters from ClusterDiscovery in separate map [#48795](https://github.com/ClickHouse/ClickHouse/pull/48795) ([Vladimir C](https://github.com/vdimir)). +* Reimplement [#48790](https://github.com/ClickHouse/ClickHouse/issues/48790) [#48797](https://github.com/ClickHouse/ClickHouse/pull/48797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow running integration tests without spark [#48803](https://github.com/ClickHouse/ClickHouse/pull/48803) ([Vitaly Baranov](https://github.com/vitlibar)). +* forbid gwpsan in debug mode to rescue stress tests [#48804](https://github.com/ClickHouse/ClickHouse/pull/48804) ([Han Fei](https://github.com/hanfei1991)). +* Simplify FileCacheFactory [#48805](https://github.com/ClickHouse/ClickHouse/pull/48805) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix double whitespace in exception message [#48815](https://github.com/ClickHouse/ClickHouse/pull/48815) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#38128](https://github.com/ClickHouse/ClickHouse/issues/38128) [#48817](https://github.com/ClickHouse/ClickHouse/pull/48817) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove excessive logging [#48826](https://github.com/ClickHouse/ClickHouse/pull/48826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* remove duplicate indentwith in clang-format [#48834](https://github.com/ClickHouse/ClickHouse/pull/48834) ([cluster](https://github.com/infdahai)). +* Try fix flacky test_concurrent_alter_move_and_drop [#48843](https://github.com/ClickHouse/ClickHouse/pull/48843) ([Sergei Trifonov](https://github.com/serxa)). +* fix the race wait loading parts [#48844](https://github.com/ClickHouse/ClickHouse/pull/48844) ([Sema Checherinda](https://github.com/CheSema)). +* suppress assert of progress for test_system_replicated_fetches [#48856](https://github.com/ClickHouse/ClickHouse/pull/48856) ([Han Fei](https://github.com/hanfei1991)). +* Fix: do not run test_store_cleanup_disk_s3 in parallel [#48863](https://github.com/ClickHouse/ClickHouse/pull/48863) ([Igor Nikonov](https://github.com/devcrafter)). +* Update README.md [#48883](https://github.com/ClickHouse/ClickHouse/pull/48883) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix test reference files for join using nullable column [#48893](https://github.com/ClickHouse/ClickHouse/pull/48893) ([Vladimir C](https://github.com/vdimir)). +* bitNot marked as NO_SANITIZE_UNDEFINED [#48899](https://github.com/ClickHouse/ClickHouse/pull/48899) ([Vladimir C](https://github.com/vdimir)). +* Fix order by in test_storage_delta [#48903](https://github.com/ClickHouse/ClickHouse/pull/48903) ([Vladimir C](https://github.com/vdimir)). +* Fix segfault when set is not built yet [#48904](https://github.com/ClickHouse/ClickHouse/pull/48904) ([Alexander Gololobov](https://github.com/davenger)). +* A non significant change (does not affect anything): add support for signed integers in the maskBits function [#48920](https://github.com/ClickHouse/ClickHouse/pull/48920) ([caipengxiang](https://github.com/awfeequdng)). +* Follow-up to [#48866](https://github.com/ClickHouse/ClickHouse/issues/48866) [#48929](https://github.com/ClickHouse/ClickHouse/pull/48929) ([Robert Schulze](https://github.com/rschu1ze)). +* Un-flake 01079_new_range_reader_segfault [#48934](https://github.com/ClickHouse/ClickHouse/pull/48934) ([Robert Schulze](https://github.com/rschu1ze)). +* Add building stage to the fasttests report, respect existing status on rerun [#48935](https://github.com/ClickHouse/ClickHouse/pull/48935) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update Settings.h [#48948](https://github.com/ClickHouse/ClickHouse/pull/48948) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update cluster.py [#48949](https://github.com/ClickHouse/ClickHouse/pull/48949) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Docs: Replace annoying three spaces in enumerations by a single space [#48951](https://github.com/ClickHouse/ClickHouse/pull/48951) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky 02706_arrow_different_dictionaries [#48952](https://github.com/ClickHouse/ClickHouse/pull/48952) ([Kruglov Pavel](https://github.com/Avogar)). +* Use default `{replica}`, `{shard}` arguments in Replicated engine [#48961](https://github.com/ClickHouse/ClickHouse/pull/48961) ([Nikolay Degterinsky](https://github.com/evillique)). +* Rename quantileApprox -> quantileGK [#48969](https://github.com/ClickHouse/ClickHouse/pull/48969) ([Vladimir C](https://github.com/vdimir)). +* Don't throw logical error when column is not found in Parquet/Arrow schema [#48987](https://github.com/ClickHouse/ClickHouse/pull/48987) ([Kruglov Pavel](https://github.com/Avogar)). +* Reimplement [#48986](https://github.com/ClickHouse/ClickHouse/issues/48986) [#49005](https://github.com/ClickHouse/ClickHouse/pull/49005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Dont allow bad changelogs [#49006](https://github.com/ClickHouse/ClickHouse/pull/49006) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update README.md [#49007](https://github.com/ClickHouse/ClickHouse/pull/49007) ([Nick-71](https://github.com/Nick-71)). +* Remove outdated test [#49014](https://github.com/ClickHouse/ClickHouse/pull/49014) ([alesapin](https://github.com/alesapin)). +* Fix typo [#49027](https://github.com/ClickHouse/ClickHouse/pull/49027) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix assertion after [#48636](https://github.com/ClickHouse/ClickHouse/issues/48636) [#49029](https://github.com/ClickHouse/ClickHouse/pull/49029) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix build error for big-endian platforms [#49037](https://github.com/ClickHouse/ClickHouse/pull/49037) ([ltrk2](https://github.com/ltrk2)). +* Update version_date.tsv and changelogs after v22.8.17.17-lts [#49046](https://github.com/ClickHouse/ClickHouse/pull/49046) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.1.7.30-stable [#49047](https://github.com/ClickHouse/ClickHouse/pull/49047) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.2.37-lts [#49048](https://github.com/ClickHouse/ClickHouse/pull/49048) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove some code [#49054](https://github.com/ClickHouse/ClickHouse/pull/49054) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some dead code in poco [#49075](https://github.com/ClickHouse/ClickHouse/pull/49075) ([Robert Schulze](https://github.com/rschu1ze)). +* Prevent false positive report by static analyzer [#49078](https://github.com/ClickHouse/ClickHouse/pull/49078) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.2.6.34-stable [#49080](https://github.com/ClickHouse/ClickHouse/pull/49080) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Enforce documentation change for a new-feature PR [#49090](https://github.com/ClickHouse/ClickHouse/pull/49090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update clickhouse-test [#49094](https://github.com/ClickHouse/ClickHouse/pull/49094) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable long 02581 in debug, enable with sanitizers [#49105](https://github.com/ClickHouse/ClickHouse/pull/49105) ([Alexander Gololobov](https://github.com/davenger)). +* Fix flaky integration test test_async_query_sending [#49107](https://github.com/ClickHouse/ClickHouse/pull/49107) ([Kruglov Pavel](https://github.com/Avogar)). +* Correct functional test to reflect interoperability [#49108](https://github.com/ClickHouse/ClickHouse/pull/49108) ([ltrk2](https://github.com/ltrk2)). +* Cleanup build guide [#49119](https://github.com/ClickHouse/ClickHouse/pull/49119) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix building iceberg without avro [#49125](https://github.com/ClickHouse/ClickHouse/pull/49125) ([Azat Khuzhin](https://github.com/azat)). +* Add slash for close tag of user_defined_zookeeper_path [#49131](https://github.com/ClickHouse/ClickHouse/pull/49131) ([Hollin](https://github.com/Hooollin)). +* Improve some lambdas [#49133](https://github.com/ClickHouse/ClickHouse/pull/49133) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not randomize prefetch settings for debug build [#49134](https://github.com/ClickHouse/ClickHouse/pull/49134) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't throw LOGICAL_ERROR when reading from remote if there is no local replica [#49136](https://github.com/ClickHouse/ClickHouse/pull/49136) ([Raúl Marín](https://github.com/Algunenano)). +* Docs: Make caption of processors_profile_log page consistent with other pages [#49138](https://github.com/ClickHouse/ClickHouse/pull/49138) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve test reports [#49151](https://github.com/ClickHouse/ClickHouse/pull/49151) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a note regarding private/public repo to logs [#49152](https://github.com/ClickHouse/ClickHouse/pull/49152) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* suppress two timeout tests [#49175](https://github.com/ClickHouse/ClickHouse/pull/49175) ([Han Fei](https://github.com/hanfei1991)). +* Document makeDateTime() and its variants [#49183](https://github.com/ClickHouse/ClickHouse/pull/49183) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix after [#49110](https://github.com/ClickHouse/ClickHouse/issues/49110) [#49206](https://github.com/ClickHouse/ClickHouse/pull/49206) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v23.4.2.11-stable.md b/docs/changelogs/v23.4.2.11-stable.md new file mode 100644 index 00000000000..3c572b9c1cb --- /dev/null +++ b/docs/changelogs/v23.4.2.11-stable.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.4.2.11-stable (b6442320f9d) FIXME as compared to v23.4.1.1943-stable (3920eb987f7) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Revert "Fix GCS native copy ([#48981](https://github.com/ClickHouse/ClickHouse/issues/48981))" [#49194](https://github.com/ClickHouse/ClickHouse/pull/49194) ([Raúl Marín](https://github.com/Algunenano)). +* Fix race on Outdated parts loading [#49223](https://github.com/ClickHouse/ClickHouse/pull/49223) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Implement status comment [#48468](https://github.com/ClickHouse/ClickHouse/pull/48468) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update curl to 8.0.1 (for CVEs) [#48765](https://github.com/ClickHouse/ClickHouse/pull/48765) ([Boris Kuschel](https://github.com/bkuschel)). +* Fallback auth gh api [#49314](https://github.com/ClickHouse/ClickHouse/pull/49314) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 50b338844df..ba81b31b8ef 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -79,8 +79,8 @@ In most cases, the read method is only responsible for reading the specified col But there are notable exceptions: -- The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table. -- Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data. +- The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table. +- Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data. The table’s `read` method can return multiple `IBlockInputStream` objects to allow parallel data processing. These multiple block input streams can read from a table in parallel. Then you can wrap these streams with various transformations (such as expression evaluation or filtering) that can be calculated independently and create a `UnionBlockInputStream` on top of them, to read from multiple streams in parallel. @@ -98,7 +98,7 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele ## Interpreters {#interpreters} -Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time. +Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the `INSERT` query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time. `InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query. @@ -132,9 +132,9 @@ Aggregation states can be serialized and deserialized to pass over the network d The server implements several different interfaces: -- An HTTP interface for any foreign clients. -- A TCP interface for the native ClickHouse client and for cross-server communication during distributed query execution. -- An interface for transferring data for replication. +- An HTTP interface for any foreign clients. +- A TCP interface for the native ClickHouse client and for cross-server communication during distributed query execution. +- An interface for transferring data for replication. Internally, it is just a primitive multithread server without coroutines or fibers. Since the server is not designed to process a high rate of simple queries but to process a relatively low rate of complex queries, each of them can process a vast amount of data for analytics. @@ -172,7 +172,7 @@ Global thread pool is `GlobalThreadPool` singleton class. To allocate thread fro Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded. -IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. +IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU. Backup to S3 does significant amount of IO operations and to avoid impact on interactive queries there is a separate `BackupsIOThreadPool` configured with `max_backups_io_thread_pool_size`, `max_backups_io_thread_pool_free_size` and `backups_io_thread_pool_queue_size` settings. For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used. diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index 1df88dbb235..ce8d1b77526 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-14 +## Install Clang-16 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. For example the commands for Bionic are like: ``` bash -sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-14 main" >> /etc/apt/sources.list -sudo apt-get install clang-14 +sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list +sudo apt-get install clang-16 ``` ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset} @@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1 cd ClickHouse mkdir build-darwin cd build-darwin -CC=clang-14 CXX=clang++-14 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. +CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. ninja ``` diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index a20913e7a32..e3550a046c7 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-16 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md new file mode 100644 index 00000000000..be2c37f5f41 --- /dev/null +++ b/docs/en/development/build-cross-s390x.md @@ -0,0 +1,123 @@ +--- +slug: /en/development/build-cross-s390x +sidebar_position: 69 +title: How to Build, Run and Debug ClickHouse on Linux for s390x (zLinux) +sidebar_label: Build on Linux for s390x (zLinux) +--- + +As of writing (2023/3/10) building for s390x considered to be experimental. Not all features can be enabled, has broken features and is currently under active development. + + +## Building + +As s390x does not support boringssl, it uses OpenSSL and has two related build options. +- By default, the s390x build will dynamically link to OpenSSL libraries. It will build OpenSSL shared objects, so it's not necessary to install OpenSSL beforehand. (This option is recommended in all cases.) +- Another option is to build OpenSSL in-tree. In this case two build flags need to be supplied to cmake +```bash +-DENABLE_OPENSSL_DYNAMIC=0 -DENABLE_OPENSSL=1 +``` + +These instructions assume that the host machine is x86_64 and has all the tooling required to build natively based on the [build instructions](../development/build.md). It also assumes that the host is Ubuntu 22.04 but the following instructions should also work on Ubuntu 20.04. + +In addition to installing the tooling used to build natively, the following additional packages need to be installed: + +```bash +apt-get install binutils-s390x-linux-gnu libc6-dev-s390x-cross gcc-s390x-linux-gnu binfmt-support qemu-user-static +``` + +If you wish to cross compile rust code install the rust cross compile target for s390x: +```bash +rustup target add s390x-unknown-linux-gnu +``` + +To build for s390x: +```bash +cmake -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-s390x.cmake .. +ninja +``` + +## Running + +Once built, the binary can be run with, eg.: + +```bash +qemu-s390x-static -L /usr/s390x-linux-gnu ./clickhouse +``` + +## Debugging + +Install LLDB: + +```bash +apt-get install lldb-15 +``` + +To Debug a s390x executable, run clickhouse using QEMU in debug mode: + +```bash +qemu-s390x-static -g 31338 -L /usr/s390x-linux-gnu ./clickhouse +``` + +In another shell run LLDB and attach, replace `` and `` with the values corresponding to your environment. +```bash +lldb-15 +(lldb) target create ./clickhouse +Current executable set to '//ClickHouse//programs/clickhouse' (s390x). +(lldb) settings set target.source-map //ClickHouse +(lldb) gdb-remote 31338 +Process 1 stopped +* thread #1, stop reason = signal SIGTRAP + frame #0: 0x0000004020e74cd0 +-> 0x4020e74cd0: lgr %r2, %r15 + 0x4020e74cd4: aghi %r15, -160 + 0x4020e74cd8: xc 0(8,%r15), 0(%r15) + 0x4020e74cde: brasl %r14, 275429939040 +(lldb) b main +Breakpoint 1: 9 locations. +(lldb) c +Process 1 resuming +Process 1 stopped +* thread #1, stop reason = breakpoint 1.1 + frame #0: 0x0000004005cd9fc0 clickhouse`main(argc_=1, argv_=0x0000004020e594a8) at main.cpp:450:17 + 447 #if !defined(FUZZING_MODE) + 448 int main(int argc_, char ** argv_) + 449 { +-> 450 inside_main = true; + 451 SCOPE_EXIT({ inside_main = false; }); + 452 + 453 /// PHDR cache is required for query profiler to work reliably +``` + +## Visual Studio Code integration + +- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). +- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` +- Launcher: +```json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug", + "type": "lldb", + "request": "custom", + "targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"], + "processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"], + "sourceMap": { "${input:targetdir}": "${workspaceFolder}" }, + } + ], + "inputs": [ + { + "id": "targetdir", + "type": "command", + "command": "extension.commandvariable.transform", + "args": { + "text": "${command:cmake.launchTargetDirectory}", + "find": ".*/([^/]+)/[^/]+$", + "replace": "$1" + } + } + ] +} +``` +- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) \ No newline at end of file diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 63d1905bb61..e65de4a37e0 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -13,7 +13,7 @@ You can install pre-built ClickHouse as described in [Quick Start](https://click The build works on x86_64 (Intel) and arm64 (Apple Silicon) based on macOS 10.15 (Catalina) or higher with Homebrew's vanilla Clang. :::note -It is also possible to compile with Apple's XCode `apple-clang` or Homebrew's `gcc`, but it's strongly discouraged. +It is also possible to compile with Apple's XCode `apple-clang`, but it's strongly discouraged. ::: ## Install Homebrew {#install-homebrew} @@ -75,20 +75,6 @@ cmake --open . # The resulting binary will be created at: ./programs/Debug/clickhouse ``` -To build using Homebrew's vanilla GCC compiler (this option is only for development experiments, and is **absolutely not recommended** unless you really know what you are doing): - -``` bash -cd ClickHouse -mkdir build -export PATH=$(brew --prefix binutils)/bin:$PATH -export PATH=$(brew --prefix gcc)/bin:$PATH -export CC=$(brew --prefix gcc)/bin/gcc-11 -export CXX=$(brew --prefix gcc)/bin/g++-11 -cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build -cmake --build build -# The resulting binary will be created at: build/programs/clickhouse -``` - ## Caveats {#caveats} If you intend to run `clickhouse-server`, make sure to increase the system’s `maxfiles` variable. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index d52b018a5a7..c35e077a166 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -9,27 +9,27 @@ description: How to build ClickHouse on Linux Supported platforms: -- x86_64 -- AArch64 -- Power9 (experimental) +- x86_64 +- AArch64 +- Power9 (experimental) -## Normal Build for Development on Ubuntu +## Building on Ubuntu -The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution. +The following tutorial is based on Ubuntu Linux. +With appropriate changes, it should also work on any other Linux distribution. +The minimum recommended Ubuntu version for development is 22.04 LTS. ### Install Prerequisites {#install-prerequisites} ``` bash -sudo apt-get install git cmake ccache python3 ninja-build yasm gawk +sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg ``` -Or cmake3 instead of cmake on older systems. +### Install and Use the Clang compiler -### Install the latest clang (recommended) +On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/). -On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/)) - -```bash +``` bash sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` @@ -40,19 +40,22 @@ sudo apt-get install software-properties-common sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test ``` -For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html). +For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). -#### Use the latest clang for Builds +As of April 2023, any version of Clang >= 15 will work. +GCC as a compiler is not supported +To build with a specific Clang version: + +:::tip +This is optional, if you are following along and just now installed Clang then check +to see what version you have installed before setting this environment variable. +::: ``` bash -export CC=clang-15 -export CXX=clang++-15 +export CC=clang-16 +export CXX=clang++-16 ``` -In this example we use version 15 that is the latest as of Sept 2022. - -Gcc cannot be used. - ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} ``` bash @@ -70,79 +73,46 @@ git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHo ``` bash cd ClickHouse mkdir build -cd build -cmake .. -ninja +cmake -S . -B build +cmake --build build # or: `cd build; ninja` ``` -To create an executable, run `ninja clickhouse`. -This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments. +To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`). +This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments. -## How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux} +## Building on Any Linux {#how-to-build-clickhouse-on-any-linux} The build requires the following components: -- Git (is used only to checkout the sources, it’s not needed for the build) -- CMake 3.15 or newer -- Ninja -- C++ compiler: clang-14 or newer -- Linker: lld -- Yasm -- Gawk +- Git (used to checkout the sources, not needed for the build) +- CMake 3.20 or newer +- Compiler: Clang 15 or newer +- Linker: lld 15 or newer +- Ninja +- Yasm +- Gawk If all the components are installed, you may build in the same way as the steps above. -Example for Ubuntu Eoan: -``` bash -sudo apt update -sudo apt install git cmake ninja-build clang++ python yasm gawk -git clone --recursive https://github.com/ClickHouse/ClickHouse.git -mkdir build && cd build -cmake ../ClickHouse -ninja -``` - Example for OpenSUSE Tumbleweed: + ``` bash -sudo zypper install git cmake ninja clang-c++ python lld yasm gawk +sudo zypper install git cmake ninja clang-c++ python lld nasm yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git -mkdir build && cd build -cmake ../ClickHouse -ninja +mkdir build +cmake -S . -B build +cmake --build build ``` Example for Fedora Rawhide: + ``` bash sudo yum update -sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk +sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git -mkdir build && cd build -cmake ../ClickHouse -make -j $(nproc) -``` - -Here is an example of how to build `clang` and all the llvm infrastructure from sources: - -``` -git clone git@github.com:llvm/llvm-project.git -mkdir llvm-build && cd llvm-build -cmake -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all ../llvm-project/llvm/ -make -j16 -sudo make install -hash clang -clang --version -``` - -You can install the older clang like clang-11 from packages and then use it to build the new clang from sources. - -Here is an example of how to install the new `cmake` from the official website: - -``` -wget https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh -chmod +x cmake-3.22.2-linux-x86_64.sh -./cmake-3.22.2-linux-x86_64.sh -export PATH=/home/milovidov/work/cmake-3.22.2-linux-x86_64/bin/:${PATH} -hash cmake +mkdir build +cmake -S . -B build +cmake --build build ``` ## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse} @@ -159,4 +129,3 @@ The CI checks build the binaries on each commit to [ClickHouse](https://github.c 1. Find the type of package for your operating system that you need and download the files. ![build artifact check](images/find-build-artifact.png) - diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md new file mode 100644 index 00000000000..0501c1cbdcb --- /dev/null +++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md @@ -0,0 +1,331 @@ +--- +slug: /en/development/building_and_benchmarking_deflate_qpl +sidebar_position: 73 +sidebar_label: Building and Benchmarking DEFLATE_QPL +description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec +--- + +# Build Clickhouse with DEFLATE_QPL + +- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) +- Pass the following flag to CMake when building ClickHouse: + +``` bash +cmake -DENABLE_QPL=1 .. +``` + +- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) + +# Run Benchmark with DEFLATE_QPL + +## Files list + +The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts: + +`client_scripts` contains python scripts for running typical benchmark, for example: +- `client_stressing_test.py`: The python script for query stress test with [1~4] server instances. +- `queries_ssb.sql`: The file lists all queries for [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema/) +- `allin1_ssb.sh`: This shell script executes benchmark workflow all in one automatically. + +`database_files` means it will store database files according to lz4/deflate/zstd codec. + +## Run benchmark automatically for Star Schema: + +``` bash +$ cd ./benchmark_sample/client_scripts +$ sh run_ssb.sh +``` + +After complete, please check all the results in this folder:`./output/` + +In case you run into failure, please manually run benchmark as below sections. + +## Definition + +[CLICKHOUSE_EXE] means the path of clickhouse executable program. + +## Environment + +- CPU: Sapphire Rapid +- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements) +- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) +- Install python modules: + +``` bash +pip3 install clickhouse_driver numpy +``` + +[Self-check for IAA] + +``` bash +$ accel-config list | grep -P 'iax|state' +``` + +Expected output like this: +``` bash + "dev":"iax1", + "state":"enabled", + "state":"enabled", +``` + +If you see nothing output, it means IAA is not ready to work. Please check IAA setup again. + +## Generate raw data + +``` bash +$ cd ./benchmark_sample +$ mkdir rawdata_dir && cd rawdata_dir +``` + +Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters: +-s 20 + +The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`: + +## Database setup + +Set up database with LZ4 codec + +``` bash +$ cd ./database_dir/lz4 +$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& +$ [CLICKHOUSE_EXE] client +``` + +Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server. + +Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) +- Creating tables in ClickHouse +- Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data. +- Converting “star schema” to de-normalized “flat schema” + +Set up database with with IAA Deflate codec + +``` bash +$ cd ./database_dir/deflate +$ [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& +$ [CLICKHOUSE_EXE] client +``` +Complete three steps same as lz4 above + +Set up database with with ZSTD codec + +``` bash +$ cd ./database_dir/zstd +$ [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& +$ [CLICKHOUSE_EXE] client +``` +Complete three steps same as lz4 above + +[self-check] +For each codec(lz4/zstd/deflate), please execute below query to make sure the databases are created successfully: +```sql +select count() from lineorder_flat +``` +You are expected to see below output: +```sql +┌───count()─┐ +│ 119994608 │ +└───────────┘ +``` +[Self-check for IAA Deflate codec] + +At the first time you execute insertion or query from client, clickhouse server console is expected to print this log: +```text +Hardware-assisted DeflateQpl codec is ready! +``` +If you never find this, but see another log as below: +```text +Initialization of hardware-assisted DeflateQpl codec failed +``` +That means IAA devices is not ready, you need check IAA setup again. + +## Benchmark with single instance + +- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance` + +``` bash +$ cpupower idle-set -d 3 +$ cpupower frequency-set -g performance +``` + +- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket. +- Single instance means single server connected with single client + +Now run benchmark for LZ4/Deflate/ZSTD respectively: + +LZ4: + +``` bash +$ cd ./database_dir/lz4 +$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& +$ cd ./client_scripts +$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log +``` + +IAA deflate: + +``` bash +$ cd ./database_dir/deflate +$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& +$ cd ./client_scripts +$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log +``` + +ZSTD: + +``` bash +$ cd ./database_dir/zstd +$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& +$ cd ./client_scripts +$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > zstd.log +``` + +Now three logs should be output as expected: +```text +lz4.log +deflate.log +zstd.log +``` + +How to check performance metrics: + +We focus on QPS, please search the keyword: `QPS_Final` and collect statistics + +## Benchmark with multi-instances + +- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances. +- Multi-instance means multiple(2 or 4)servers connected with respective client. +- The cores of one socket need to be divided equally and assigned to the servers respectively. +- For multi-instances, must create new folder for each codec and insert dataset by following the similar steps as single instance. + +There are 2 differences: +- For client side, you need launch clickhouse with the assigned port during table creation and data insertion. +- For server side, you need launch clickhouse with the specific xml config file in which port has been assigned. All customized xml config files for multi-instances has been provided under ./server_config. + +Here we assume there are 60 cores per socket and take 2 instances for example. +Launch server for first instance +LZ4: + +``` bash +$ cd ./database_dir/lz4 +$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& +``` + +ZSTD: + +``` bash +$ cd ./database_dir/zstd +$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& +``` + +IAA Deflate: + +``` bash +$ cd ./database_dir/deflate +$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& +``` + +[Launch server for second instance] + +LZ4: + +``` bash +$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2 +$ cp ../../server_config/config_lz4_s2.xml ./ +$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& +``` + +ZSTD: + +``` bash +$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2 +$ cp ../../server_config/config_zstd_s2.xml ./ +$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& +``` + +IAA Deflate: + +``` bash +$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2 +$ cp ../../server_config/config_deflate_s2.xml ./ +$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null& +``` + +Creating tables && Inserting data for second instance + +Creating tables: + +``` bash +$ [CLICKHOUSE_EXE] client -m --port=9001 +``` + +Inserting data: + +``` bash +$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001 +``` + +- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`. +- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance. + +Benchmarking with 2 instances + +LZ4: + +``` bash +$ cd ./database_dir/lz4 +$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& +$ cd ./database_dir/lz4_s2 +$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& +$ cd ./client_scripts +$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log +``` + +ZSTD: + +``` bash +$ cd ./database_dir/zstd +$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& +$ cd ./database_dir/zstd_s2 +$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& +$ cd ./client_scripts +$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log +``` + +IAA deflate + +``` bash +$ cd ./database_dir/deflate +$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& +$ cd ./database_dir/deflate_s2 +$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null& +$ cd ./client_scripts +$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log +``` + +Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/ + +Now three logs should be output as expected: + +``` text +lz4_2insts.log +deflate_2insts.log +zstd_2insts.log +``` +How to check performance metrics: + +We focus on QPS, please search the keyword: `QPS_Final` and collect statistics + +Benchmark setup for 4 instances is similar with 2 instances above. +We recommend use 2 instances benchmark data as final report for review. + +## Tips + +Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one: + +``` bash +$ ps -aux| grep clickhouse +$ kill -9 [PID] +``` +By comparing the query list in ./client_scripts/queries_ssb.sql with official [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema), you will find 3 queries are not included: Q1.2/Q1.3/Q3.4 . This is because cpu utilization% is very low <10% for these queries which means cannot demonstrate performance differences. diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 232eee5b3cf..738c5458cc3 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t ### Report Details -- **Compiler**: `clang-15`, optionally with the name of a target platform +- **Compiler**: `clang-16`, optionally with the name of a target platform - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Status**: `success` or `fail` diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 04158a0c3f7..4b296c43db4 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -1,6 +1,6 @@ --- slug: /en/development/contrib -sidebar_position: 71 +sidebar_position: 72 sidebar_label: Third-Party Libraries description: A list of third-party libraries used --- diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index b46cc10f99d..1f3ab1aae2c 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -39,9 +39,15 @@ Next, you need to download the source files onto your working machine. This is c In the command line terminal run: - git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git + git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git cd ClickHouse +Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files): + + git clone git@github.com:your_github_username/ClickHouse.git + cd ClickHouse + ./contrib/update-submodules.sh + Note: please, substitute *your_github_username* with what is appropriate! This command will create a directory `ClickHouse` containing the working copy of the project. @@ -67,7 +73,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These You can also clone the repository via https protocol: - git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git + git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command. @@ -146,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 35683aa7822..5b03468623d 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -1,6 +1,6 @@ --- slug: /en/development/style -sidebar_position: 69 +sidebar_position: 70 sidebar_label: C++ Guide description: A list of recommendations regarding coding style, naming convention, formatting and more --- @@ -370,8 +370,8 @@ If the file contains a single function, name the file the same way as the functi **11.** If the name contains an abbreviation, then: -- For variable names, the abbreviation should use lowercase letters `mysql_connection` (not `mySQL_connection`). -- For names of classes and functions, keep the uppercase letters in the abbreviation`MySQLConnection` (not `MySqlConnection`). +- For variable names, the abbreviation should use lowercase letters `mysql_connection` (not `mySQL_connection`). +- For names of classes and functions, keep the uppercase letters in the abbreviation`MySQLConnection` (not `MySqlConnection`). **12.** Constructor arguments that are used just to initialize the class members should be named the same way as the class members, but with an underscore at the end. @@ -434,9 +434,9 @@ In application code, memory must be freed by the object that owns it. Examples: -- The easiest way is to place an object on the stack, or make it a member of another class. -- For a large number of small objects, use containers. -- For automatic deallocation of a small number of objects that reside in the heap, use `shared_ptr/unique_ptr`. +- The easiest way is to place an object on the stack, or make it a member of another class. +- For a large number of small objects, use containers. +- For automatic deallocation of a small number of objects that reside in the heap, use `shared_ptr/unique_ptr`. **2.** Resource management. @@ -504,10 +504,10 @@ This is not recommended, but it is allowed. Use the following options: -- Create a function (`done()` or `finalize()`) that will do all the work in advance that might lead to an exception. If that function was called, there should be no exceptions in the destructor later. -- Tasks that are too complex (such as sending messages over the network) can be put in separate method that the class user will have to call before destruction. -- If there is an exception in the destructor, it’s better to log it than to hide it (if the logger is available). -- In simple applications, it is acceptable to rely on `std::terminate` (for cases of `noexcept` by default in C++11) to handle exceptions. +- Create a function (`done()` or `finalize()`) that will do all the work in advance that might lead to an exception. If that function was called, there should be no exceptions in the destructor later. +- Tasks that are too complex (such as sending messages over the network) can be put in separate method that the class user will have to call before destruction. +- If there is an exception in the destructor, it’s better to log it than to hide it (if the logger is available). +- In simple applications, it is acceptable to rely on `std::terminate` (for cases of `noexcept` by default in C++11) to handle exceptions. **6.** Anonymous code blocks. @@ -529,11 +529,11 @@ ready_any.set(); In offline data processing programs: -- Try to get the best possible performance on a single CPU core. You can then parallelize your code if necessary. +- Try to get the best possible performance on a single CPU core. You can then parallelize your code if necessary. In server applications: -- Use the thread pool to process requests. At this point, we haven’t had any tasks that required userspace context switching. +- Use the thread pool to process requests. At this point, we haven’t had any tasks that required userspace context switching. Fork is not used for parallelization. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 9ae49e8f707..1d3e7d4964e 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -1,6 +1,6 @@ --- slug: /en/development/tests -sidebar_position: 70 +sidebar_position: 71 sidebar_label: Testing title: ClickHouse Testing description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. @@ -31,6 +31,9 @@ folder and run the following command: PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key ``` +Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which +are located near the test file itself (so for `queries/0_stateless/foo.sql` output will be in `queries/0_stateless/foo.stdout`). + For more options, see `tests/clickhouse-test --help`. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. There are also options to run tests in parallel or in randomized order. ### Adding a New Test @@ -68,7 +71,7 @@ SELECT 1 | `global` | Same as `shard`. Prefer `shard` || | `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` | | `replica` | Same as `zookeeper`. Prefer `zookeeper` || -| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test| +| `no-fasttest`| Test is not run under [Fast test](continuous-integration.md#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test| | `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers | | `no-replicated-database` ||| | `no-ordinary-database` ||| @@ -191,11 +194,11 @@ If the system clickhouse-server is already running and you do not want to stop i Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. Examples: -- cross-compile for Darwin x86_64 (macOS) -- cross-compile for FreeBSD x86_64 -- cross-compile for Linux AArch64 -- build on Ubuntu with libraries from system packages (discouraged) -- build with shared linking of libraries (discouraged) +- cross-compile for Darwin x86_64 (macOS) +- cross-compile for FreeBSD x86_64 +- cross-compile for Linux AArch64 +- build on Ubuntu with libraries from system packages (discouraged) +- build with shared linking of libraries (discouraged) For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts. diff --git a/docs/en/engines/_category_.yml b/docs/en/engines/_category_.yml index 8c6ba12c6f1..2aa5df72955 100644 --- a/docs/en/engines/_category_.yml +++ b/docs/en/engines/_category_.yml @@ -4,5 +4,4 @@ collapsible: true collapsed: true link: type: generated-index - title: Database & Table Engines slug: /en/engines diff --git a/docs/en/engines/database-engines/atomic.md b/docs/en/engines/database-engines/atomic.md index 3ea5008c80a..0224e1aba21 100644 --- a/docs/en/engines/database-engines/atomic.md +++ b/docs/en/engines/database-engines/atomic.md @@ -59,4 +59,4 @@ For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table ## See Also -- [system.databases](../../operations/system-tables/databases.md) system table +- [system.databases](../../operations/system-tables/databases.md) system table diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index 835383f503f..233cbbb4247 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -11,18 +11,18 @@ Database engines allow you to work with tables. By default, ClickHouse uses the Here is a complete list of available database engines. Follow the links for more details: -- [Atomic](../../engines/database-engines/atomic.md) +- [Atomic](../../engines/database-engines/atomic.md) -- [MySQL](../../engines/database-engines/mysql.md) +- [MySQL](../../engines/database-engines/mysql.md) -- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) +- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) -- [Lazy](../../engines/database-engines/lazy.md) +- [Lazy](../../engines/database-engines/lazy.md) -- [PostgreSQL](../../engines/database-engines/postgresql.md) +- [PostgreSQL](../../engines/database-engines/postgresql.md) -- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) +- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) -- [Replicated](../../engines/database-engines/replicated.md) +- [Replicated](../../engines/database-engines/replicated.md) -- [SQLite](../../engines/database-engines/sqlite.md) +- [SQLite](../../engines/database-engines/sqlite.md) diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index 899c8d024f1..1f1c996d4bf 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -6,7 +6,7 @@ sidebar_position: 70 # [experimental] MaterializedMySQL -:::warning +:::note This is an experimental feature that should not be used in production. ::: @@ -22,10 +22,10 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo **Engine Parameters** -- `host:port` — MySQL server endpoint. -- `database` — MySQL database name. -- `user` — MySQL user. -- `password` — User password. +- `host:port` — MySQL server endpoint. +- `database` — MySQL database name. +- `user` — MySQL user. +- `password` — User password. ## Engine Settings @@ -245,7 +245,7 @@ extra care needs to be taken. You may specify overrides for tables that do not exist yet. -:::warning +:::important It is easy to break replication with table overrides if not used with care. For example: * If an ALIAS column is added with a table override, and a column with the same name is later added to the source diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index b43f71a7576..08e9f998626 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -26,10 +26,10 @@ ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SE **Engine Parameters** -- `host:port` — PostgreSQL server endpoint. -- `database` — PostgreSQL database name. -- `user` — PostgreSQL user. -- `password` — User password. +- `host:port` — PostgreSQL server endpoint. +- `database` — PostgreSQL database name. +- `user` — PostgreSQL user. +- `password` — User password. ## Example of Use {#example-of-use} @@ -120,9 +120,9 @@ Warning: for this case dots in table name are not allowed. 2. Each replicated table must have one of the following [replica identity](https://www.postgresql.org/docs/10/sql-altertable.html#SQL-CREATETABLE-REPLICA-IDENTITY): -- primary key (by default) +- primary key (by default) -- index +- index ``` bash postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL); @@ -145,7 +145,7 @@ FROM pg_class WHERE oid = 'postgres_table'::regclass; ``` -:::warning +:::note Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. ::: @@ -171,7 +171,7 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm Possible values: - - Positive integer. + - Positive integer. Default value: `65536`. diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index e4ff734d55f..20434ad124e 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -12,9 +12,9 @@ The `MySQL` database engine translate queries to the MySQL server so you can per You cannot perform the following queries: -- `RENAME` -- `CREATE TABLE` -- `ALTER` +- `RENAME` +- `CREATE TABLE` +- `ALTER` ## Creating a Database {#creating-a-database} @@ -25,10 +25,10 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') **Engine Parameters** -- `host:port` — MySQL server address. -- `database` — Remote database name. -- `user` — MySQL user. -- `password` — User password. +- `host:port` — MySQL server address. +- `database` — Remote database name. +- `user` — MySQL user. +- `password` — User password. ## Data Types Support {#data_types-support} @@ -60,7 +60,7 @@ These variables are supported: - `version` - `max_allowed_packet` -:::warning +:::note By now these variables are stubs and don't correspond to anything. ::: diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index 939995a61c5..294d1202bdd 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -21,12 +21,12 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use **Engine Parameters** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `user` — PostgreSQL user. -- `password` — User password. -- `schema` — PostgreSQL schema. -- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `user` — PostgreSQL user. +- `password` — User password. +- `schema` — PostgreSQL schema. +- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`. ## Data Types Support {#data_types-support} @@ -140,3 +140,4 @@ DESCRIBE TABLE test_database.test_table; ## Related content - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) +- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2) diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 43d1ce5ec3f..5672633c4a2 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -17,9 +17,9 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na **Engine Parameters** -- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database. -- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. -- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. +- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database. +- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. +- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. diff --git a/docs/en/engines/database-engines/sqlite.md b/docs/en/engines/database-engines/sqlite.md index eef0bb84088..fc2a6525a68 100644 --- a/docs/en/engines/database-engines/sqlite.md +++ b/docs/en/engines/database-engines/sqlite.md @@ -17,7 +17,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe **Engine Parameters** -- `db_path` — Path to a file with SQLite database. +- `db_path` — Path to a file with SQLite database. ## Data Types Support {#data_types-support} diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index 31563e2e727..d7c582164de 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -9,12 +9,12 @@ toc_title: Introduction The table engine (type of table) determines: -- How and where data is stored, where to write it to, and where to read it from. -- Which queries are supported, and how. -- Concurrent data access. -- Use of indexes, if present. -- Whether multithread request execution is possible. -- Data replication parameters. +- How and where data is stored, where to write it to, and where to read it from. +- Which queries are supported, and how. +- Concurrent data access. +- Use of indexes, if present. +- Whether multithread request execution is possible. +- Data replication parameters. ## Engine Families {#engine-families} @@ -24,13 +24,13 @@ The most universal and functional table engines for high-load tasks. The propert Engines in the family: -- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree) -- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree) -- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree) -- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree) -- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) -- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) -- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) +- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree) +- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree) +- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree) +- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree) +- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) +- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) +- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) ### Log {#log} @@ -38,9 +38,9 @@ Lightweight [engines](../../engines/table-engines/log-family/index.md) with mini Engines in the family: -- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog) -- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog) -- [Log](../../engines/table-engines/log-family/log.md#log) +- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog) +- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog) +- [Log](../../engines/table-engines/log-family/log.md#log) ### Integration Engines {#integration-engines} @@ -49,34 +49,34 @@ Engines for communicating with other data storage and processing systems. Engines in the family: -- [ODBC](../../engines/table-engines/integrations/odbc.md) -- [JDBC](../../engines/table-engines/integrations/jdbc.md) -- [MySQL](../../engines/table-engines/integrations/mysql.md) -- [MongoDB](../../engines/table-engines/integrations/mongodb.md) -- [HDFS](../../engines/table-engines/integrations/hdfs.md) -- [S3](../../engines/table-engines/integrations/s3.md) -- [Kafka](../../engines/table-engines/integrations/kafka.md) -- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) -- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) -- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) +- [ODBC](../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../engines/table-engines/integrations/hdfs.md) +- [S3](../../engines/table-engines/integrations/s3.md) +- [Kafka](../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) ### Special Engines {#special-engines} Engines in the family: -- [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) -- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) -- [Merge](../../engines/table-engines/special/merge.md#merge) -- [File](../../engines/table-engines/special/file.md#file) -- [Null](../../engines/table-engines/special/null.md#null) -- [Set](../../engines/table-engines/special/set.md#set) -- [Join](../../engines/table-engines/special/join.md#join) -- [URL](../../engines/table-engines/special/url.md#table_engines-url) -- [View](../../engines/table-engines/special/view.md#table_engines-view) -- [Memory](../../engines/table-engines/special/memory.md#memory) -- [Buffer](../../engines/table-engines/special/buffer.md#buffer) -- [KeeperMap](../../engines/table-engines/special/keepermap.md) +- [Distributed](../../engines/table-engines/special/distributed.md#distributed) +- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) +- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) +- [Merge](../../engines/table-engines/special/merge.md#merge) +- [File](../../engines/table-engines/special/file.md#file) +- [Null](../../engines/table-engines/special/null.md#null) +- [Set](../../engines/table-engines/special/set.md#set) +- [Join](../../engines/table-engines/special/join.md#join) +- [URL](../../engines/table-engines/special/url.md#table_engines-url) +- [View](../../engines/table-engines/special/view.md#table_engines-view) +- [Memory](../../engines/table-engines/special/memory.md#memory) +- [Buffer](../../engines/table-engines/special/buffer.md#buffer) +- [KeeperMap](../../engines/table-engines/special/keepermap.md) ## Virtual Columns {#table_engines-virtual_columns} diff --git a/docs/en/engines/table-engines/integrations/ExternalDistributed.md b/docs/en/engines/table-engines/integrations/ExternalDistributed.md index 4e0f66ebd4f..3fb3fe88b55 100644 --- a/docs/en/engines/table-engines/integrations/ExternalDistributed.md +++ b/docs/en/engines/table-engines/integrations/ExternalDistributed.md @@ -22,17 +22,17 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original table structure: -- Column names should be the same as in the original table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. +- Column names should be the same as in the original table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. **Engine Parameters** -- `engine` — The table engine `MySQL` or `PostgreSQL`. -- `host:port` — MySQL or PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — User name. -- `password` — User password. +- `engine` — The table engine `MySQL` or `PostgreSQL`. +- `host:port` — MySQL or PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — User name. +- `password` — User password. ## Implementation Details {#implementation-details} @@ -48,6 +48,6 @@ You can specify any number of shards and any number of replicas for each shard. **See Also** -- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) -- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) -- [Distributed table engine](../../../engines/table-engines/special/distributed.md) +- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) +- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) +- [Distributed table engine](../../../engines/table-engines/special/distributed.md) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index 99183ac7308..3e2e177e28f 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -18,8 +18,8 @@ CREATE TABLE deltalake **Engine parameters** -- `url` — Bucket url with path to the existing Delta Lake table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. +- `url` — Bucket url with path to the existing Delta Lake table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) @@ -49,4 +49,4 @@ CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table') ## See also -- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md) +- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 7c04a6594a6..b9db0fae68f 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -17,7 +17,7 @@ ENGINE = HDFS(URI, format) **Engine Parameters** - `URI` - whole file URI in HDFS. The path part of `URI` may contain globs. In this case the table would be readonly. -- `format` - specifies one of the available file formats. To perform +- `format` - specifies one of the available file formats. To perform `SELECT` queries, the format must be supported for input, and to perform `INSERT` queries – for output. The available formats are listed in the [Formats](../../../interfaces/formats.md#formats) section. @@ -58,13 +58,13 @@ SELECT * FROM hdfs_engine_table LIMIT 2 ## Implementation Details {#implementation-details} -- Reads and writes can be parallel. -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended. +- Reads and writes can be parallel. +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended. - :::warning Zero-copy replication is not ready for production + :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: @@ -72,10 +72,10 @@ SELECT * FROM hdfs_engine_table LIMIT 2 Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. @@ -83,12 +83,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table 1. Suppose we have several files in TSV format with the following URIs on HDFS: - - 'hdfs://hdfs1:9000/some_dir/some_file_1' - - 'hdfs://hdfs1:9000/some_dir/some_file_2' - - 'hdfs://hdfs1:9000/some_dir/some_file_3' - - 'hdfs://hdfs1:9000/another_dir/some_file_1' - - 'hdfs://hdfs1:9000/another_dir/some_file_2' - - 'hdfs://hdfs1:9000/another_dir/some_file_3' + - 'hdfs://hdfs1:9000/some_dir/some_file_1' + - 'hdfs://hdfs1:9000/some_dir/some_file_2' + - 'hdfs://hdfs1:9000/some_dir/some_file_3' + - 'hdfs://hdfs1:9000/another_dir/some_file_1' + - 'hdfs://hdfs1:9000/another_dir/some_file_2' + - 'hdfs://hdfs1:9000/another_dir/some_file_3' 1. There are several ways to make a table consisting of all six files: @@ -110,7 +110,7 @@ Table consists of all the files in both directories (all files should satisfy fo CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') ``` -:::warning +:::note If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: @@ -145,7 +145,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us | **parameter** | **default value** | -| - | - | +| - | - | | rpc\_client\_connect\_tcpnodelay | true | | dfs\_client\_read\_shortcircuit | true | | output\_replace-datanode-on-failure | true | @@ -195,7 +195,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us #### ClickHouse extras {#clickhouse-extras} | **parameter** | **default value** | -| - | - | +| - | - | |hadoop\_kerberos\_keytab | "" | |hadoop\_kerberos\_principal | "" | |libhdfs3\_conf | "" | @@ -230,9 +230,9 @@ libhdfs3 support HDFS namenode HA. ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. **See Also** -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index fd16e717c89..adcb73605bb 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -28,17 +28,17 @@ PARTITION BY expr See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query. The table structure can differ from the original Hive table structure: -- Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns. -- Column types should be the same from those in the original Hive table. -- Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure. +- Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns. +- Column types should be the same from those in the original Hive table. +- Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure. **Engine Parameters** -- `thrift://host:port` — Hive Metastore address +- `thrift://host:port` — Hive Metastore address -- `database` — Remote database name. +- `database` — Remote database name. -- `table` — Remote table name. +- `table` — Remote table name. ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index a14134ecdfa..a11e915aa3d 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -18,8 +18,8 @@ CREATE TABLE hudi_table **Engine parameters** -- `url` — Bucket url with the path to an existing Hudi table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. +- `url` — Bucket url with the path to an existing Hudi table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) @@ -49,4 +49,4 @@ CREATE TABLE hudi_table ENGINE=Hudi(hudi_conf, filename = 'test_table') ## See also -- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md) +- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md) diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md index 4322fc6b773..77cefc9283d 100644 --- a/docs/en/engines/table-engines/integrations/iceberg.md +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -18,8 +18,8 @@ CREATE TABLE iceberg_table **Engine parameters** -- `url` — url with the path to an existing Iceberg table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. +- `url` — url with the path to an existing Iceberg table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) @@ -49,4 +49,4 @@ CREATE TABLE iceberg_table ENGINE=Iceberg(iceberg_conf, filename = 'test_table') ## See also -- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md) +- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index 7a8b537aea8..b321a644d32 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -10,20 +10,20 @@ ClickHouse provides various means for integrating with external systems, includi List of supported integrations: -- [ODBC](../../../engines/table-engines/integrations/odbc.md) -- [JDBC](../../../engines/table-engines/integrations/jdbc.md) -- [MySQL](../../../engines/table-engines/integrations/mysql.md) -- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) -- [HDFS](../../../engines/table-engines/integrations/hdfs.md) -- [S3](../../../engines/table-engines/integrations/s3.md) -- [Kafka](../../../engines/table-engines/integrations/kafka.md) -- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) -- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) -- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) -- [SQLite](../../../engines/table-engines/integrations/sqlite.md) -- [Hive](../../../engines/table-engines/integrations/hive.md) -- [ExternalDistributed](../../../engines/table-engines/integrations/ExternalDistributed.md) -- [MaterializedPostgreSQL](../../../engines/table-engines/integrations/materialized-postgresql.md) -- [NATS](../../../engines/table-engines/integrations/nats.md) -- [DeltaLake](../../../engines/table-engines/integrations/deltalake.md) -- [Hudi](../../../engines/table-engines/integrations/hudi.md) +- [ODBC](../../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../../engines/table-engines/integrations/hdfs.md) +- [S3](../../../engines/table-engines/integrations/s3.md) +- [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) +- [SQLite](../../../engines/table-engines/integrations/sqlite.md) +- [Hive](../../../engines/table-engines/integrations/hive.md) +- [ExternalDistributed](../../../engines/table-engines/integrations/ExternalDistributed.md) +- [MaterializedPostgreSQL](../../../engines/table-engines/integrations/materialized-postgresql.md) +- [NATS](../../../engines/table-engines/integrations/nats.md) +- [DeltaLake](../../../engines/table-engines/integrations/deltalake.md) +- [Hudi](../../../engines/table-engines/integrations/hudi.md) diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index 6853b5d1df7..99f851dcf3e 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -25,14 +25,14 @@ ENGINE = JDBC(datasource_uri, external_database, external_table) **Engine Parameters** -- `datasource_uri` — URI or name of an external DBMS. +- `datasource_uri` — URI or name of an external DBMS. URI Format: `jdbc:://:/?user=&password=`. Example for MySQL: `jdbc:mysql://localhost:3306/?user=root&password=root`. -- `external_database` — Database in an external DBMS. +- `external_database` — Database in an external DBMS. -- `external_table` — Name of the table in `external_database` or a select query like `select * from table1 where column1=1`. +- `external_table` — Name of the table in `external_database` or a select query like `select * from table1 where column1=1`. ## Usage Example {#usage-example} @@ -91,4 +91,4 @@ FROM system.numbers ## See Also {#see-also} -- [JDBC table function](../../../sql-reference/table-functions/jdbc.md). +- [JDBC table function](../../../sql-reference/table-functions/jdbc.md). diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index ef422632d3e..ccfca4c1f1f 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -10,17 +10,17 @@ This engine works with [Apache Kafka](http://kafka.apache.org/). Kafka lets you: -- Publish or subscribe to data flows. -- Organize fault-tolerant storage. -- Process streams as they become available. +- Publish or subscribe to data flows. +- Organize fault-tolerant storage. +- Process streams as they become available. ## Creating a Table {#table_engine-kafka-creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + name1 [type1] [ALIAS expr1], + name2 [type2] [ALIAS expr2], ... ) ENGINE = Kafka() SETTINGS @@ -46,27 +46,27 @@ SETTINGS Required parameters: -- `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`). -- `kafka_topic_list` — A list of Kafka topics. -- `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you do not want messages to be duplicated in the cluster, use the same group name everywhere. -- `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`). +- `kafka_topic_list` — A list of Kafka topics. +- `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you do not want messages to be duplicated in the cluster, use the same group name everywhere. +- `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: -- `kafka_row_delimiter` — Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** -- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. -- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). -- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. -- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. -- `kafka_client_id` — Client identifier. Empty by default. -- `kafka_poll_timeout_ms` — Timeout for single poll from Kafka. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). -- `kafka_poll_max_batch_size` — Maximum amount of messages to be polled in a single Kafka poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). -- `kafka_flush_interval_ms` — Timeout for flushing data from Kafka. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). -- `kafka_thread_per_consumer` — Provide independent thread for each consumer. When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Default: `0`. -- `kafka_handle_error_mode` — How to handle errors for Kafka engine. Possible values: default, stream. -- `kafka_commit_on_select` — Commit messages when select query is made. Default: `false`. -- `kafka_max_rows_per_message` — The maximum number of rows written in one kafka message for row-based formats. Default : `1`. +- `kafka_row_delimiter` — Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** +- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. +- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. +- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. +- `kafka_client_id` — Client identifier. Empty by default. +- `kafka_poll_timeout_ms` — Timeout for single poll from Kafka. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). +- `kafka_poll_max_batch_size` — Maximum amount of messages to be polled in a single Kafka poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). +- `kafka_flush_interval_ms` — Timeout for flushing data from Kafka. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). +- `kafka_thread_per_consumer` — Provide independent thread for each consumer. When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Default: `0`. +- `kafka_handle_error_mode` — How to handle errors for Kafka engine. Possible values: default, stream. +- `kafka_commit_on_select` — Commit messages when select query is made. Default: `false`. +- `kafka_max_rows_per_message` — The maximum number of rows written in one kafka message for row-based formats. Default : `1`. Examples: @@ -102,7 +102,7 @@ Examples: Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects. If possible, switch old projects to the method described above. ::: @@ -113,6 +113,10 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format +:::info +The Kafka table engine doesn't support columns with [default value](../../../sql-reference/statements/create/table.md#default_value). If you need columns with default value, you can add them at materialized view level (see below). +::: + ## Description {#description} The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name. @@ -235,14 +239,14 @@ Example: ## Virtual Columns {#virtual-columns} -- `_topic` — Kafka topic. -- `_key` — Key of the message. -- `_offset` — Offset of the message. -- `_timestamp` — Timestamp of the message. -- `_timestamp_ms` — Timestamp in milliseconds of the message. -- `_partition` — Partition of Kafka topic. -- `_headers.name` — Array of message's headers keys. -- `_headers.value` — Array of message's headers values. +- `_topic` — Kafka topic. +- `_key` — Key of the message. +- `_offset` — Offset of the message. +- `_timestamp` — Timestamp of the message. +- `_timestamp_ms` — Timestamp in milliseconds of the message. +- `_partition` — Partition of Kafka topic. +- `_headers.name` — Array of message's headers keys. +- `_headers.value` — Array of message's headers values. ## Data formats support {#data-formats-support} @@ -254,5 +258,5 @@ The number of rows in one Kafka message depends on whether the format is row-bas **See Also** -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) -- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size) +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) +- [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index 11e7928c3ed..e112ca3bbb1 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -19,11 +19,11 @@ PRIMARY KEY key; **Engine Parameters** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — PostgreSQL user. -- `password` — User password. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — PostgreSQL user. +- `password` — User password. ## Requirements {#requirements} @@ -33,11 +33,13 @@ PRIMARY KEY key; 3. Only database [Atomic](https://en.wikipedia.org/wiki/Atomicity_(database_systems)) is allowed. +4. The `MaterializedPostgreSQL` table engine only works for PostgreSQL versions >= 11 as the implementation requires the [pg_replication_slot_advance](https://pgpedia.info/p/pg_replication_slot_advance.html) PostgreSQL function. + ## Virtual columns {#virtual-columns} -- `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values: +- `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values: - `1` — Row is not deleted, - `-1` — Row is deleted. @@ -52,6 +54,6 @@ PRIMARY KEY key; SELECT key, value, _version FROM postgresql_db.postgresql_replica; ``` -:::warning +:::note Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. ::: diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index be45ce88c67..a647ac9993f 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -21,17 +21,17 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name **Engine Parameters** -- `host:port` — MongoDB server address. +- `host:port` — MongoDB server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `collection` — Remote collection name. +- `collection` — Remote collection name. -- `user` — MongoDB user. +- `user` — MongoDB user. -- `password` — User password. +- `password` — User password. -- `options` — MongoDB connection string options (optional parameter). +- `options` — MongoDB connection string options (optional parameter). ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index e00347c3163..6ff6221c877 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -31,25 +31,25 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original MySQL table structure: -- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types. -- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. +- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** -- `host:port` — MySQL server address. +- `host:port` — MySQL server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `table` — Remote table name. +- `table` — Remote table name. -- `user` — MySQL user. +- `user` — MySQL user. -- `password` — User password. +- `password` — User password. -- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted. +- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted. -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. +- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html) to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. @@ -121,8 +121,8 @@ Allows to automatically close the connection after query execution, i.e. disable Possible values: -- 1 — Auto-close connection is allowed, so the connection reuse is disabled -- 0 — Auto-close connection is not allowed, so the connection reuse is enabled +- 1 — Auto-close connection is allowed, so the connection reuse is disabled +- 0 — Auto-close connection is not allowed, so the connection reuse is enabled Default value: `1`. @@ -132,8 +132,8 @@ Sets the number of retries for pool with failover. Possible values: -- Positive integer. -- 0 — There are no retries for pool with failover. +- Positive integer. +- 0 — There are no retries for pool with failover. Default value: `3`. @@ -143,7 +143,7 @@ Size of connection pool (if all connections are in use, the query will wait unti Possible values: -- Positive integer. +- Positive integer. Default value: `16`. @@ -153,7 +153,7 @@ Timeout (in seconds) for waiting for free connection (in case of there is alread Possible values: -- Positive integer. +- Positive integer. Default value: `5`. @@ -163,7 +163,7 @@ Connect timeout (in seconds). Possible values: -- Positive integer. +- Positive integer. Default value: `10`. @@ -173,11 +173,11 @@ Read/write timeout (in seconds). Possible values: -- Positive integer. +- Positive integer. Default value: `300`. ## See Also {#see-also} -- [The mysql table function](../../../sql-reference/table-functions/mysql.md) -- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) +- [The mysql table function](../../../sql-reference/table-functions/mysql.md) +- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql) diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index f2856c89238..7f09c516d6f 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -45,9 +45,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: -- `nats_url` – host:port (for example, `localhost:5672`).. -- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>` -- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `nats_url` – host:port (for example, `localhost:5672`).. +- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>` +- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index aabc37442f9..37e08dc1420 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -28,15 +28,15 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the source table structure: -- Column names should be the same as in the source table, but you can use just some of these columns and in any order. -- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. -- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. +- Column names should be the same as in the source table, but you can use just some of these columns and in any order. +- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Name of a database in an external DBMS. -- `external_table` — Name of a table in the `external_database`. +- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. +- `external_database` — Name of a database in an external DBMS. +- `external_table` — Name of a table in the `external_database`. ## Usage Example {#usage-example} @@ -126,5 +126,5 @@ SELECT * FROM odbc_t ## See Also {#see-also} -- [ODBC dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc) -- [ODBC table function](../../../sql-reference/table-functions/odbc.md) +- [ODBC dictionaries](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc) +- [ODBC table function](../../../sql-reference/table-functions/odbc.md) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index b73d28c8508..f27d4d48f75 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -13,8 +13,8 @@ The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data th ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], + name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], ... ) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]); ``` @@ -23,19 +23,19 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original PostgreSQL table structure: -- Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types. -- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. +- Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — PostgreSQL user. -- `password` — User password. -- `schema` — Non-default table schema. Optional. -- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — PostgreSQL user. +- `password` — User password. +- `schema` — Non-default table schema. Optional. +- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. or via config (since version 21.11): @@ -74,7 +74,7 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp PostgreSQL `Array` types are converted into ClickHouse arrays. -:::warning +:::note Be careful - in PostgreSQL an array data, created like a `type_name[]`, may contain multi-dimensional arrays of different dimensions in different table rows in same column. But in ClickHouse it is only allowed to have multidimensional arrays of the same count of dimensions in all table rows in same column. ::: @@ -111,7 +111,7 @@ In the example below replica `example01-1` has the highest priority: ## Usage Example {#usage-example} -Table in PostgreSQL: +### Table in PostgreSQL ``` text postgres=# CREATE TABLE "public"."test" ( @@ -134,7 +134,9 @@ postgresql> SELECT * FROM test; (1 row) ``` -Table in ClickHouse, retrieving data from the PostgreSQL table created above: +### Creating Table in ClickHouse, and connecting to PostgreSQL table created above + +This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table: ``` sql CREATE TABLE default.postgresql_table @@ -146,6 +148,35 @@ CREATE TABLE default.postgresql_table ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); ``` +### Inserting initial data from PostgreSQL table into ClickHouse table, using a SELECT query + +The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse: + +``` sql +INSERT INTO default.postgresql_table +SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); +``` + +### Inserting incremental data from PostgreSQL table into ClickHouse table + +If then performing ongoing synchronization between the PostgreSQL table and ClickHouse table after the initial insert, you can use a WHERE clause in ClickHouse to insert only data added to PostgreSQL based on a timestamp or unique sequence ID. + +This would require keeping track of the max ID or timestamp previously added, such as the following: + +``` sql +SELECT max(`int_id`) AS maxIntID FROM default.postgresql_table; +``` + +Then inserting values from PostgreSQL table greater than the max + +``` sql +INSERT INTO default.postgresql_table +SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); +WHERE int_id > maxIntID; +``` + +### Selecting data from the resulting ClickHouse table + ``` sql SELECT * FROM postgresql_table WHERE str IN ('test'); ``` @@ -156,7 +187,7 @@ SELECT * FROM postgresql_table WHERE str IN ('test'); └────────────────┴──────┴────────┘ ``` -Using Non-default Schema: +### Using Non-default Schema ```text postgres=# CREATE SCHEMA "nice.schema"; @@ -173,8 +204,10 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **See Also** -- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md) -- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) +- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md) +- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql) ## Related content + - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) +- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index eec8691a165..08062278904 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -10,8 +10,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c `RabbitMQ` lets you: -- Publish or subscribe to data flows. -- Process streams as they become available. +- Publish or subscribe to data flows. +- Process streams as they become available. ## Creating a Table {#table_engine-rabbitmq-creating-a-table} @@ -51,9 +51,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: -- `rabbitmq_host_port` – host:port (for example, `localhost:5672`). -- `rabbitmq_exchange_name` – RabbitMQ exchange name. -- `rabbitmq_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `rabbitmq_host_port` – host:port (for example, `localhost:5672`). +- `rabbitmq_exchange_name` – RabbitMQ exchange name. +- `rabbitmq_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: @@ -138,24 +138,24 @@ There can be no more than one exchange per table. One exchange can be shared bet Exchange type options: -- `direct` - Routing is based on the exact matching of keys. Example table key list: `key1,key2,key3,key4,key5`, message key can equal any of them. -- `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys. -- `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. -- `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`. -- `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. +- `direct` - Routing is based on the exact matching of keys. Example table key list: `key1,key2,key3,key4,key5`, message key can equal any of them. +- `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys. +- `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. +- `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`. +- `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. Setting `rabbitmq_queue_base` may be used for the following cases: -- to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same. -- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. -- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.) +- to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same. +- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. +- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.) To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block. If `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings are specified along with `rabbitmq_exchange_type`, then: -- `rabbitmq-consistent-hash-exchange` plugin must be enabled. -- `message_id` property of the published messages must be specified (unique for each message/batch). +- `rabbitmq-consistent-hash-exchange` plugin must be enabled. +- `message_id` property of the published messages must be specified (unique for each message/batch). For insert query there is message metadata, which is added for each published message: `messageID` and `republished` flag (true, if published more than once) - can be accessed via message headers. diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 723425429a5..595bc0c344f 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -12,17 +12,18 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) - ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, [compression]) + ENGINE = S3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression]) [PARTITION BY expr] [SETTINGS ...] ``` **Engine parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). -- `format` — The [format](../../../interfaces/formats.md#formats) of the file. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). -- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../../interfaces/formats.md#formats) of the file. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. ### PARTITION BY @@ -50,20 +51,20 @@ SELECT * FROM s3_engine_table LIMIT 2; ``` ## Virtual columns {#virtual-columns} -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). ## Implementation Details {#implementation-details} -- Reads and writes can be parallel -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not supported. +- Reads and writes can be parallel +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not supported. - :::warning Zero-copy replication is not ready for production + :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: @@ -71,14 +72,14 @@ For more information about virtual columns see [here](../../../engines/table-eng `path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. -:::warning +:::note If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: @@ -95,12 +96,12 @@ CREATE TABLE big_table (name String, value UInt32) Suppose we have several files in CSV format with the following URIs on S3: -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv' There are several ways to make a table consisting of all six files: @@ -130,14 +131,17 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) The following settings can be set before query execution or placed into configuration file. -- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`. -- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. -- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. -- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `32Mb`. +- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `16Mb`. +- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. +- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`. +- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`. +- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. @@ -145,15 +149,20 @@ Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): -- `endpoint` — Specifies prefix of an endpoint. Mandatory. -- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. -- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`. -- `region` — Specifies S3 region name. Optional. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. -- `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. -- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. +- `endpoint` — Specifies prefix of an endpoint. Mandatory. +- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. +- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`. +- `region` — Specifies S3 region name. Optional. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `no_sign_request` - Ignore all the credentials so requests are not signed. Useful for accessing public buckets. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. +- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. +- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. +- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). +- `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. +- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. **Example:** @@ -166,13 +175,29 @@ The following settings can be specified in configuration file for given endpoint + + + + + ``` +## Accessing public buckets + +ClickHouse tries to fetch credentials from many different types of sources. +Sometimes, it can produce problems when accessing some buckets that are public causing the client to return `403` error code. +This issue can be avoided by using `NOSIGN` keyword, forcing the client to ignore all the credentials, and not sign the requests. + +``` sql +CREATE TABLE big_table (name String, value UInt32) + ENGINE = S3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv', NOSIGN, 'CSVWithNames'); +``` + ## See also -- [s3 table function](../../../sql-reference/table-functions/s3.md) +- [s3 table function](../../../sql-reference/table-functions/s3.md) diff --git a/docs/en/engines/table-engines/integrations/sqlite.md b/docs/en/engines/table-engines/integrations/sqlite.md index ba11b73339d..20597d37a87 100644 --- a/docs/en/engines/table-engines/integrations/sqlite.md +++ b/docs/en/engines/table-engines/integrations/sqlite.md @@ -20,8 +20,8 @@ The engine allows to import and export data to SQLite and supports queries to SQ **Engine Parameters** -- `db_path` — Path to SQLite file with a database. -- `table` — Name of a table in the SQLite database. +- `db_path` — Path to SQLite file with a database. +- `table` — Name of a table in the SQLite database. ## Usage Example {#usage-example} @@ -56,5 +56,5 @@ SELECT * FROM sqlite_db.table2 ORDER BY col1; **See Also** -- [SQLite](../../../engines/database-engines/sqlite.md) engine -- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function +- [SQLite](../../../engines/database-engines/sqlite.md) engine +- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 9e671163bbf..aca24e68378 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -10,9 +10,9 @@ These engines were developed for scenarios when you need to quickly write many s Engines of the family: -- [StripeLog](/docs/en/engines/table-engines/log-family/stripelog.md) -- [Log](/docs/en/engines/table-engines/log-family/log.md) -- [TinyLog](/docs/en/engines/table-engines/log-family/tinylog.md) +- [StripeLog](/docs/en/engines/table-engines/log-family/stripelog.md) +- [Log](/docs/en/engines/table-engines/log-family/log.md) +- [TinyLog](/docs/en/engines/table-engines/log-family/tinylog.md) `Log` family table engines can store data to [HDFS](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-hdfs) or [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3) distributed file systems. @@ -20,21 +20,21 @@ Engines of the family: Engines: -- Store data on a disk. +- Store data on a disk. -- Append data to the end of file when writing. +- Append data to the end of file when writing. -- Support locks for concurrent data access. +- Support locks for concurrent data access. During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently. -- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). +- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). -- Do not support indexes. +- Do not support indexes. This means that `SELECT` queries for ranges of data are not efficient. -- Do not write data atomically. +- Do not write data atomically. You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown. diff --git a/docs/en/engines/table-engines/log-family/stripelog.md b/docs/en/engines/table-engines/log-family/stripelog.md index 747713fe69a..6aaa6caf654 100644 --- a/docs/en/engines/table-engines/log-family/stripelog.md +++ b/docs/en/engines/table-engines/log-family/stripelog.md @@ -29,8 +29,8 @@ The `StripeLog` engine stores all the columns in one file. For each `INSERT` que For each table ClickHouse writes the files: -- `data.bin` — Data file. -- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted. +- `data.bin` — Data file. +- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted. The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations. diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 9677f75a358..2b8b43802ea 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -43,7 +43,7 @@ When creating an `AggregatingMergeTree` table the same [clauses](../../../engine Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects and, if possible, switch the old projects to the method described above. ::: @@ -122,3 +122,7 @@ FROM test.mv_visits GROUP BY StartDate ORDER BY StartDate; ``` + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 0bd665116f0..0043e1b6748 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -45,7 +45,7 @@ When creating a `CollapsingMergeTree` table, the same [query clauses](../../../e Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects and, if possible, switch old projects to the method described above. ::: @@ -60,7 +60,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `sign` have the same meaning as in `MergeTree`. -- `sign` — Name of the column with the type of row: `1` — “state” row, `-1` — “cancel” row. +- `sign` — Name of the column with the type of row: `1` — “state” row, `-1` — “cancel” row. Column Data Type — `Int8`. diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index b1e79c4c3fd..edb320a2507 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -6,7 +6,7 @@ sidebar_label: Custom Partitioning Key # Custom Partitioning Key -:::warning +:::note In most cases you do not need a partition key, and in most other cases you do not need a partition key more granular than by months. You should never use too granular of partitioning. Don't partition your data by client identifiers or names. Instead, make a client identifier or name the first column in the ORDER BY expression. @@ -77,11 +77,11 @@ The `name` column contains the names of the partition data parts. You can use th Let’s break down the name of the part: `201901_1_9_2_11`: -- `201901` is the partition name. -- `1` is the minimum number of the data block. -- `9` is the maximum number of the data block. -- `2` is the chunk level (the depth of the merge tree it is formed from). -- `11` is the mutation version (if a part mutated) +- `201901` is the partition name. +- `1` is the minimum number of the data block. +- `9` is the maximum number of the data block. +- `2` is the chunk level (the depth of the merge tree it is formed from). +- `11` is the mutation version (if a part mutated) :::info The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). @@ -159,15 +159,15 @@ FROM session_log GROUP BY UserID; ``` -:::warning +:::note Performance of such a query heavily depends on the table layout. Because of that the optimisation is not enabled by default. ::: The key factors for a good performance: -- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine -- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing -- partitions should be comparable in size, so all threads will do roughly the same amount of work +- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine +- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing +- partitions should be comparable in size, so all threads will do roughly the same amount of work :::info It's recommended to apply some hash function to columns in `partition by` clause in order to distribute data evenly between partitions. @@ -175,6 +175,6 @@ It's recommended to apply some hash function to columns in `partition by` clause Relevant settings are: -- `allow_aggregate_partitions_independently` - controls if the use of optimisation is enabled -- `force_aggregate_partitions_independently` - forces its use when it's applicable from the correctness standpoint, but getting disabled by internal logic that estimates its expediency -- `max_number_of_partitions_for_independent_aggregation` - hard limit on the maximal number of partitions table could have +- `allow_aggregate_partitions_independently` - controls if the use of optimisation is enabled +- `force_aggregate_partitions_independently` - forces its use when it's applicable from the correctness standpoint, but getting disabled by internal logic that estimates its expediency +- `max_number_of_partitions_for_independent_aggregation` - hard limit on the maximal number of partitions table could have diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index 104ec049ec4..c96e40d127c 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -33,19 +33,19 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen A table for the Graphite data should have the following columns for the following data: -- Metric name (Graphite sensor). Data type: `String`. +- Metric name (Graphite sensor). Data type: `String`. -- Time of measuring the metric. Data type: `DateTime`. +- Time of measuring the metric. Data type: `DateTime`. -- Value of the metric. Data type: `Float64`. +- Value of the metric. Data type: `Float64`. -- Version of the metric. Data type: any numeric (ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts). +- Version of the metric. Data type: any numeric (ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts). The names of these columns should be set in the rollup configuration. **GraphiteMergeTree parameters** -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. +- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. **Query clauses** @@ -55,7 +55,7 @@ When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/t Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects and, if possible, switch old projects to the method described above. ::: @@ -73,7 +73,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `config_section` have the same meaning as in `MergeTree`. -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. +- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. @@ -129,7 +129,7 @@ default ... ``` -:::warning +:::important Patterns must be strictly ordered: 1. Patterns without `function` or `retention`. @@ -141,18 +141,18 @@ When processing a row, ClickHouse checks the rules in the `pattern` sections. Ea Fields for `pattern` and `default` sections: -- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`. +- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`. It's unnecessary when performance is not critical, or only one metrics type is used, e.g. plain metrics. By default only one type of rules set is created. Otherwise, if any of special types is defined, two different sets are created. One for plain metrics (root.branch.leaf) and one for tagged metrics (root.branch.leaf;tag1=value1). The default rules are ended up in both sets. Valid values: - - `all` (default) - a universal rule, used when `rule_type` is omitted. - - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression. - - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression. - - `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`. -- `regexp` – A pattern for the metric name (a regular or DSL). -- `age` – The minimum age of the data in seconds. -- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). -- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages. + - `all` (default) - a universal rule, used when `rule_type` is omitted. + - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression. + - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression. + - `tag_list` - a rule for tagged metrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`. +- `regexp` – A pattern for the metric name (a regular or DSL). +- `age` – The minimum age of the data in seconds. +- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). +- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages. ### Configuration Example without rules types {#configuration-example} @@ -263,6 +263,6 @@ Valid values: ``` -:::warning +:::note Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). ::: diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md index aa11258dc4a..31f5a87a2b6 100644 --- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md @@ -15,7 +15,7 @@ tokenized cells of the string column. For example, the string cell "I will be a " wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more useful the resulting inverted index will be. -:::warning +:::note Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics. ::: @@ -191,3 +191,7 @@ is performance. In practice, users often search for multiple terms at once. For '%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future). ::: + +## Related Content + +- Blog: [Introducing Inverted Indices in ClickHouse](https://clickhouse.com/blog/clickhouse-search-with-inverted-indices) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7c0416d3ea4..79ced0b6ce5 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -12,19 +12,19 @@ Engines in the `MergeTree` family are designed for inserting a very large amount Main features: -- Stores data sorted by primary key. +- Stores data sorted by primary key. This allows you to create a small sparse index that helps find data faster. -- Partitions can be used if the [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. +- Partitions can be used if the [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. ClickHouse supports certain operations with partitions that are more efficient than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. -- Data replication support. +- Data replication support. The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](/docs/en/engines/table-engines/mergetree-family/replication.md). -- Data sampling support. +- Data sampling support. If necessary, you can set the data sampling method in the table. @@ -192,7 +192,7 @@ The `index_granularity` setting can be omitted because 8192 is the default value Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects. If possible, switch old projects to the method described above. ::: @@ -207,10 +207,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **MergeTree() Parameters** -- `date-column` — The name of a column of the [Date](/docs/en/sql-reference/data-types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format. -- `sampling_expression` — An expression for sampling. -- `(primary, key)` — Primary key. Type: [Tuple()](/docs/en/sql-reference/data-types/tuple.md) -- `index_granularity` — The granularity of an index. The number of data rows between the “marks” of an index. The value 8192 is appropriate for most tasks. +- `date-column` — The name of a column of the [Date](/docs/en/sql-reference/data-types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format. +- `sampling_expression` — An expression for sampling. +- `(primary, key)` — Primary key. Type: [Tuple()](/docs/en/sql-reference/data-types/tuple.md) +- `index_granularity` — The granularity of an index. The number of data rows between the “marks” of an index. The value 8192 is appropriate for most tasks. **Example** @@ -250,9 +250,9 @@ Take the `(CounterID, Date)` primary key as an example. In this case, the sortin If the data query specifies: -- `CounterID in ('a', 'h')`, the server reads the data in the ranges of marks `[0, 3)` and `[6, 8)`. -- `CounterID IN ('a', 'h') AND Date = 3`, the server reads the data in the ranges of marks `[1, 3)` and `[7, 8)`. -- `Date = 3`, the server reads the data in the range of marks `[1, 10]`. +- `CounterID in ('a', 'h')`, the server reads the data in the ranges of marks `[0, 3)` and `[6, 8)`. +- `CounterID IN ('a', 'h') AND Date = 3`, the server reads the data in the ranges of marks `[1, 3)` and `[7, 8)`. +- `Date = 3`, the server reads the data in the range of marks `[1, 10]`. The examples above show that it is always more effective to use an index than a full scan. @@ -268,18 +268,18 @@ You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` cla The number of columns in the primary key is not explicitly limited. Depending on the data structure, you can include more or fewer columns in the primary key. This may: -- Improve the performance of an index. +- Improve the performance of an index. If the primary key is `(a, b)`, then adding another column `c` will improve the performance if the following conditions are met: - - There are queries with a condition on column `c`. - - Long data ranges (several times longer than the `index_granularity`) with identical values for `(a, b)` are common. In other words, when adding another column allows you to skip quite long data ranges. + - There are queries with a condition on column `c`. + - Long data ranges (several times longer than the `index_granularity`) with identical values for `(a, b)` are common. In other words, when adding another column allows you to skip quite long data ranges. -- Improve data compression. +- Improve data compression. ClickHouse sorts data by primary key, so the higher the consistency, the better the compression. -- Provide additional logic when merging data parts in the [CollapsingMergeTree](/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md/#table_engine-collapsingmergetree) and [SummingMergeTree](/docs/en/engines/table-engines/mergetree-family/summingmergetree.md) engines. +- Provide additional logic when merging data parts in the [CollapsingMergeTree](/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md/#table_engine-collapsingmergetree) and [SummingMergeTree](/docs/en/engines/table-engines/mergetree-family/summingmergetree.md) engines. In this case it makes sense to specify the *sorting key* that is different from the primary key. @@ -377,8 +377,9 @@ CREATE TABLE table_name i32 Int32, s String, ... - INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4 + INDEX idx1 u64 TYPE bloom_filter GRANULARITY 3, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 3, + INDEX idx3 u64 * length(s) TYPE set(1000) GRANULARITY 4 ) ENGINE = MergeTree() ... ``` @@ -386,8 +387,25 @@ CREATE TABLE table_name Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries: ``` sql -SELECT count() FROM table WHERE s < 'z' -SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +SELECT count() FROM table WHERE u64 == 10; +SELECT count() FROM table WHERE u64 * i32 >= 1234 +SELECT count() FROM table WHERE u64 * length(s) == 1234 +``` + +Data skipping indexes can also be created on composite columns: + +```sql +-- on columns of type Map: +INDEX map_key_index mapKeys(map_column) TYPE bloom_filter +INDEX map_value_index mapValues(map_column) TYPE bloom_filter + +-- on columns of type Tuple: +INDEX tuple_1_index tuple_column.1 TYPE bloom_filter +INDEX tuple_2_index tuple_column.2 TYPE bloom_filter + +-- on columns of type Nested: +INDEX nested_1_index col.nested_col1 TYPE bloom_filter +INDEX nested_2_index col.nested_col2 TYPE bloom_filter ``` ### Available Types of Indices {#available-types-of-indices} @@ -421,6 +439,50 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - `random_seed` — The seed for Bloom filter hash functions. +Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: + +```sql +CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); + +CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); + +CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); + +CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] +AS +(number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions)))) + +``` +To use those functions,we need to specify two parameter at least. +For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: + + +```sql +--- estimate number of bits in the filter +SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; + +┌─size_of_bloom_filter_in_bytes─┐ +│ 10304 │ +└───────────────────────────────┘ + +--- estimate number of hash functions +SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions + +┌─number_of_hash_functions─┐ +│ 13 │ +└──────────────────────────┘ + +``` +Of course, you can also use those functions to estimate parameters by other conditions. +The functions refer to the content [here](https://hur.st/bloomfilter). + + #### Token Bloom Filter The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters. @@ -432,67 +494,58 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran - An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details. - An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details. -## Example of index creation for Map data type - -``` -INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1 -INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1 -``` - - -``` sql -INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 -INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4 -INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4 -``` - ### Functions Support {#functions-support} Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes. -The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below. +Indexes of type `set` can be utilized by all functions. The other index types are supported as follows: -| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | -|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| -| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | -| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | -| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ | -| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | +| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | inverted | +|------------------------------------------------------------------------------------------------------------|-------------|--------|------------|------------|--------------|----------| +| [equals (=, ==)](/docs/en/sql-reference/functions/comparison-functions.md/#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](/docs/en/sql-reference/functions/string-search-functions.md/#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | +| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | +| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ | +| [in](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](/docs/en/sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (<)](/docs/en/sql-reference/functions/comparison-functions.md/#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [greater (>)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [lessOrEquals (<=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [greaterOrEquals (>=)](/docs/en/sql-reference/functions/comparison-functions.md/#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ | +| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ | +| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | +| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | +| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ | +| hasTokenCaseInsensitive (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | +| hasTokenCaseInsensitiveOrNull (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ | Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. +(*) For `hasTokenCaseInsensitive` and `hasTokenCaseInsensitiveOrNull` to be effective, the `tokenbf_v1` index must be created on lowercased data, for example `INDEX idx (lower(str_col)) TYPE tokenbf_v1(512, 3, 0)`. + :::note Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false. For example: -- Can be optimized: - - `s LIKE '%test%'` - - `NOT s NOT LIKE '%test%'` - - `s = 1` - - `NOT s != 1` - - `startsWith(s, 'test')` -- Can not be optimized: - - `NOT s LIKE '%test%'` - - `s NOT LIKE '%test%'` - - `NOT s = 1` - - `s != 1` - - `NOT startsWith(s, 'test')` +- Can be optimized: + - `s LIKE '%test%'` + - `NOT s NOT LIKE '%test%'` + - `s = 1` + - `NOT s != 1` + - `startsWith(s, 'test')` +- Can not be optimized: + - `NOT s LIKE '%test%'` + - `s NOT LIKE '%test%'` + - `NOT s = 1` + - `s != 1` + - `NOT startsWith(s, 'test')` ::: @@ -605,11 +658,11 @@ TTL expr Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time): -- `DELETE` - delete expired rows (default action); -- `RECOMPRESS codec_name` - recompress data part with the `codec_name`; -- `TO DISK 'aaa'` - move part to the disk `aaa`; -- `TO VOLUME 'bbb'` - move part to the disk `bbb`; -- `GROUP BY` - aggregate expired rows. +- `DELETE` - delete expired rows (default action); +- `RECOMPRESS codec_name` - recompress data part with the `codec_name`; +- `TO DISK 'aaa'` - move part to the disk `aaa`; +- `TO VOLUME 'bbb'` - move part to the disk `bbb`; +- `GROUP BY` - aggregate expired rows. `DELETE` action can be used together with `WHERE` clause to delete only some of the expired rows based on a filtering condition: ``` sql @@ -674,7 +727,7 @@ TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPR SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; ``` -Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows. +Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value across the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows. ``` sql CREATE TABLE table_for_aggregation @@ -713,16 +766,22 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be ### Terms {#terms} -- Disk — Block device mounted to the filesystem. -- Default disk — Disk that stores the path specified in the [path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-path) server setting. -- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). -- Storage policy — Set of volumes and the rules for moving data between them. +- Disk — Block device mounted to the filesystem. +- Default disk — Disk that stores the path specified in the [path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-path) server setting. +- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). +- Storage policy — Set of volumes and the rules for moving data between them. The names given to the described entities can be found in the system tables, [system.storage_policies](/docs/en/operations/system-tables/storage_policies.md/#system_tables-storage_policies) and [system.disks](/docs/en/operations/system-tables/disks.md/#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. ### Configuration {#table_engine-mergetree-multiple-volumes_configure} -Disks, volumes and storage policies should be declared inside the `` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory. +Disks, volumes and storage policies should be declared inside the `` tag either in a file in the `config.d` directory. + +:::tip +Disks can also be declared in the `SETTINGS` section of a query. This is useful +for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL. +See [dynamic storage](#dynamic-storage) for more details. +::: Configuration structure: @@ -750,9 +809,9 @@ Configuration structure: Tags: -- `` — Disk name. Names must be different for all disks. -- `path` — path under which a server will store data (`data` and `shadow` folders), should be terminated with ‘/’. -- `keep_free_space_bytes` — the amount of free disk space to be reserved. +- `` — Disk name. Names must be different for all disks. +- `path` — path under which a server will store data (`data` and `shadow` folders), should be terminated with ‘/’. +- `keep_free_space_bytes` — the amount of free disk space to be reserved. The order of the disk definition is not important. @@ -788,14 +847,14 @@ Storage policies configuration markup: Tags: -- `policy_name_N` — Policy name. Policy names must be unique. -- `volume_name_N` — Volume name. Volume names must be unique. -- `disk` — a disk within a volume. -- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. -- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. -- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. -- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). -- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. +- `policy_name_N` — Policy name. Policy names must be unique. +- `volume_name_N` — Volume name. Volume names must be unique. +- `disk` — a disk within a volume. +- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. +- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. +- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). +- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. Cofiguration examples: @@ -865,16 +924,97 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' The `default` storage policy implies using only one volume, which consists of only one disk given in ``. You could change storage policy after table creation with [ALTER TABLE ... MODIFY SETTING] query, new policy should include all old disks and volumes with same names. -The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/settings/settings.md/#background_move_pool_size) setting. +The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting. + +### Dynamic Storage + +This example query shows how to attach a table stored at a URL and configure the +remote storage within the query. The web storage is not configured in the ClickHouse +configuration files; all the settings are in the CREATE/ATTACH query. + +:::note +The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. +::: + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end +``` + +### Nested Dynamic Storage + +This example query builds on the above dynamic disk configuration and shows how to +use a local disk to cache data from a table stored at a URL. Neither the cache disk +nor the web storage is configured in the ClickHouse configuration files; both are +configured in the CREATE/ATTACH query settings. + +In the settings highlighted below notice that the disk of `type=web` is nested within +the disk of `type=cache`. + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end +``` ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: -- As a result of an insert (`INSERT` query). -- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). -- When downloading from another replica. -- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). +- As a result of an insert (`INSERT` query). +- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). +- When downloading from another replica. +- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy: @@ -898,7 +1038,7 @@ User can assign new big parts to different disks of a [JBOD](https://en.wikipedi ## Using S3 for Data Storage {#table_engine-mergetree-s3} :::note -Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/data-ingestion/s3/gcs-merge-tree.md). +Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). ::: `MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`. @@ -915,7 +1055,11 @@ Configuration markup: your_access_key_id your_secret_access_key +
Authorization: Bearer SOME-TOKEN
your_base64_encoded_customer_key + your_kms_key_id + your_kms_encryption_context + true http://proxy1 http://proxy2 @@ -947,29 +1091,34 @@ ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [ Required parameters: -- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. -- `access_key_id` — S3 access key id. -- `secret_access_key` — S3 secret access key. +- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. +- `access_key_id` — S3 access key id. +- `secret_access_key` — S3 secret access key. Optional parameters: -- `region` — S3 region name. -- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. -- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. -- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. -- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. -- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. -- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. -- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. -- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. -- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. -- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `region` — S3 region name. +- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. +- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. +- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. +- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. +- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. +- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. +- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. +- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. +- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. +- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. ### Configuring the cache @@ -984,12 +1133,12 @@ This is the cache configuration from above: ``` These parameters define the cache layer: -- `type` — If a disk is of type `cache` it caches mark and index files in memory. -- `disk` — The name of the disk that will be cached. +- `type` — If a disk is of type `cache` it caches mark and index files in memory. +- `disk` — The name of the disk that will be cached. Cache parameters: -- `path` — The path where metadata for the cache is stored. -- `max_size` — The size (amount of memory) that the cache can grow to. +- `path` — The path where metadata for the cache is stored. +- `max_size` — The size (amount of memory) that the cache can grow to. :::tip There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details. @@ -1070,11 +1219,12 @@ Authentication parameters (the disk will try all available methods **and** Manag * `account_name` and `account_key` - For authentication using Shared Key. Limit parameters (mainly for internal usage): -* `max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. +* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. * `min_bytes_for_seek` - Limits the size of a seekable region. * `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. * `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. * `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. +* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Other parameters: * `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. @@ -1084,15 +1234,15 @@ Other parameters: Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). - :::warning Zero-copy replication is not ready for production + :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: ## Virtual Columns {#virtual-columns} -- `_part` — Name of a part. -- `_part_index` — Sequential index of the part in the query result. -- `_partition_id` — Name of a partition. -- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). -- `_partition_value` — Values (a tuple) of a `partition by` expression. -- `_sample_factor` — Sample factor (from the query). +- `_part` — Name of a part. +- `_part_index` — Sequential index of the part in the query result. +- `_partition_id` — Name of a partition. +- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). +- `_partition_value` — Values (a tuple) of a `partition by` expression. +- `_sample_factor` — Sample factor (from the query). diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index f5d81182898..7db2f3b465a 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -20,17 +20,17 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) ENGINE = ReplacingMergeTree([ver]) +) ENGINE = ReplacingMergeTree([ver [, is_deleted]]) [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[SETTINGS name=value, ...] +[SETTINGS name=value, clean_deleted_rows=value, ...] ``` For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). -:::warning +:::note Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY KEY`. ::: @@ -88,6 +88,20 @@ SELECT * FROM mySecondReplacingMT FINAL; └─────┴─────────┴─────────────────────┘ ``` +### is_deleted + +`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. + + Column data type — `UInt8`. + +:::note +`is_deleted` can only be enabled when `ver` is used. + +The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`. + +No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. +::: + ## Query clauses When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. @@ -96,7 +110,7 @@ When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/t Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects and, if possible, switch old projects to the method described above. ::: @@ -111,6 +125,6 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `ver` have the same meaning as in `MergeTree`. -- `ver` - column with the version. Optional parameter. For a description, see the text above. +- `ver` - column with the version. Optional parameter. For a description, see the text above. diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 37ab8ac9fd3..01782ac25bd 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -8,24 +8,31 @@ sidebar_label: Data Replication :::note In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace: + +```sql +ENGINE = ReplicatedReplacingMergeTree( + '/clickhouse/tables/{shard}/table_name', + '{replica}', + ver +) ``` -ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver) -``` + with: -``` + +```sql ENGINE = ReplicatedReplacingMergeTree ``` ::: Replication is only supported for tables in the MergeTree family: -- ReplicatedMergeTree -- ReplicatedSummingMergeTree -- ReplicatedReplacingMergeTree -- ReplicatedAggregatingMergeTree -- ReplicatedCollapsingMergeTree -- ReplicatedVersionedCollapsingMergeTree -- ReplicatedGraphiteMergeTree +- ReplicatedMergeTree +- ReplicatedSummingMergeTree +- ReplicatedReplacingMergeTree +- ReplicatedAggregatingMergeTree +- ReplicatedCollapsingMergeTree +- ReplicatedVersionedCollapsingMergeTree +- ReplicatedGraphiteMergeTree Replication works at the level of an individual table, not the entire server. A server can store both replicated and non-replicated tables at the same time. @@ -35,15 +42,15 @@ Compressed data for `INSERT` and `ALTER` queries is replicated (for more informa `CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated: -- The `CREATE TABLE` query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica. -- The `DROP TABLE` query deletes the replica located on the server where the query is run. -- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas. +- The `CREATE TABLE` query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica. +- The `DROP TABLE` query deletes the replica located on the server where the query is run. +- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas. -ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/clickhouse-keeper.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended. +ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/index.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended. To use replication, set parameters in the [zookeeper](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings_zookeeper) server configuration section. -:::warning +:::note Don’t neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem. ::: @@ -112,7 +119,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug For very large clusters, you can use different ZooKeeper clusters for different shards. However, from our experience this has not proven necessary based on production clusters with approximately 300 servers. -Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](/docs/en/operations/settings/settings.md/#background_schedule_pool_size) setting. +Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_schedule_pool_size) setting. `ReplicatedMergeTree` engine uses a separate thread pool for replicated fetches. Size of the pool is limited by the [background_fetches_pool_size](/docs/en/operations/settings/settings.md/#background_fetches_pool_size) setting which can be tuned with a server restart. @@ -144,7 +151,7 @@ ENGINE = ReplicatedReplacingMergeTree The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`. :::tip -Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated. +Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated. ::: ### Replicated\*MergeTree parameters @@ -309,8 +316,8 @@ Create a MergeTree table with a different name. Move all the data from the direc If you want to get rid of a `ReplicatedMergeTree` table without launching the server: -- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`). -- Delete the corresponding path in ClickHouse Keeper (`/path_to_table/replica_name`). +- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`). +- Delete the corresponding path in ClickHouse Keeper (`/path_to_table/replica_name`). After this, you can launch the server, create a `MergeTree` table, move the data to its directory, and then restart the server. @@ -320,8 +327,8 @@ If the data in ClickHouse Keeper was lost or damaged, you can save data by movin **See Also** -- [background_schedule_pool_size](/docs/en/operations/settings/settings.md/#background_schedule_pool_size) -- [background_fetches_pool_size](/docs/en/operations/settings/settings.md/#background_fetches_pool_size) -- [execute_merges_on_single_replica_time_threshold](/docs/en/operations/settings/settings.md/#execute-merges-on-single-replica-time-threshold) -- [max_replicated_fetches_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_fetches_network_bandwidth) -- [max_replicated_sends_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_sends_network_bandwidth) +- [background_schedule_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_schedule_pool_size) +- [background_fetches_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_fetches_pool_size) +- [execute_merges_on_single_replica_time_threshold](/docs/en/operations/settings/settings.md/#execute-merges-on-single-replica-time-threshold) +- [max_replicated_fetches_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_fetches_network_bandwidth) +- [max_replicated_sends_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_sends_network_bandwidth) diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index b2b6272c58e..c3cbb0d2a03 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -44,7 +44,7 @@ When creating a `SummingMergeTree` table the same [clauses](../../../engines/tab Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects and, if possible, switch the old projects to the method described above. ::: @@ -59,7 +59,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `columns` have the same meaning as in `MergeTree`. -- `columns` — tuple with names of columns values of which will be summarized. Optional parameter. For a description, see the text above. +- `columns` — tuple with names of columns values of which will be summarized. Optional parameter. For a description, see the text above. @@ -122,8 +122,8 @@ Table can have nested data structures that are processed in a special way. If the name of a nested table ends with `Map` and it contains at least two columns that meet the following criteria: -- the first column is numeric `(*Int*, Date, DateTime)` or a string `(String, FixedString)`, let’s call it `key`, -- the other columns are arithmetic `(*Int*, Float32/64)`, let’s call it `(values...)`, +- the first column is numeric `(*Int*, Date, DateTime)` or a string `(String, FixedString)`, let’s call it `key`, +- the other columns are arithmetic `(*Int*, Float32/64)`, let’s call it `(values...)`, then this nested table is interpreted as a mapping of `key => (values...)`, and when merging its rows, the elements of two data sets are merged by `key` with a summation of the corresponding `(values...)`. @@ -186,3 +186,7 @@ ARRAY JOIN When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`. For nested data structure, you do not need to specify its columns in the tuple of columns for summation. + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 2891907f79a..22a15c0e15e 100644 --- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -8,8 +8,8 @@ sidebar_label: VersionedCollapsingMergeTree This engine: -- Allows quick writing of object states that are continually changing. -- Deletes old object states in the background. This significantly reduces the volume of storage. +- Allows quick writing of object states that are continually changing. +- Deletes old object states in the background. This significantly reduces the volume of storage. See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details. @@ -58,7 +58,7 @@ When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../. Deprecated Method for Creating a Table -:::warning +:::note Do not use this method in new projects. If possible, switch old projects to the method described above. ::: @@ -73,11 +73,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters except `sign` and `version` have the same meaning as in `MergeTree`. -- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. +- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. Column Data Type — `Int8`. -- `version` — Name of the column with the version of the object state. +- `version` — Name of the column with the version of the object state. The column data type should be `UInt*`. diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index 5f28fafbc26..f7d84b9b452 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -86,7 +86,7 @@ If the set of columns in the Buffer table does not match the set of columns in a If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared. The same happens if the subordinate table does not exist when the buffer is flushed. -:::warning +:::note Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. Check that this error is fixed in your release before trying to run ALTER on the Buffer table. ::: diff --git a/docs/en/engines/table-engines/special/dictionary.md b/docs/en/engines/table-engines/special/dictionary.md index e487ca2002f..3517ebfcdc6 100644 --- a/docs/en/engines/table-engines/special/dictionary.md +++ b/docs/en/engines/table-engines/special/dictionary.md @@ -6,7 +6,7 @@ sidebar_label: Dictionary # Dictionary Table Engine -The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table. +The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/index.md) data as a ClickHouse table. ## Example {#example} @@ -97,4 +97,4 @@ select * from products limit 1; **See Also** -- [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) +- [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index f4f541843d3..d1a0b13b363 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -141,6 +141,10 @@ Clusters are configured in the [server configuration file](../../../operations/c be used as current user for the query. --> + + + + 1 @@ -184,7 +188,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com - `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server does not start. If you change the DNS record, restart the server. - `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Not to be confused with `http_port`. -- `user` – Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../operations/access-rights.md). +- `user` – Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../guides/sre/user-management/index.md). - `password` – The password for connecting to a remote server (not masked). Default value: empty string. - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `9440` and be configured with correct certificates. - `compression` - Use data compression. Default value: `true`. @@ -225,8 +229,8 @@ A simple remainder from the division is a limited solution for sharding and isn You should be concerned about the sharding scheme in the following cases: -- Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient. -- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. +- Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient. +- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. @@ -238,7 +242,7 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). -To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation. +To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation. ## Virtual Columns {#virtual-columns} @@ -252,6 +256,6 @@ Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](. **See Also** -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description -- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting -- [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description +- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting +- [shardNum()](../../../sql-reference/functions/other-functions.md#shardnum) and [shardCount()](../../../sql-reference/functions/other-functions.md#shardcount) functions diff --git a/docs/en/engines/table-engines/special/executable.md b/docs/en/engines/table-engines/special/executable.md index 5d01762f61b..25049d7b46e 100644 --- a/docs/en/engines/table-engines/special/executable.md +++ b/docs/en/engines/table-engines/special/executable.md @@ -120,7 +120,7 @@ Some comments about the `sentiment` table: - The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values - The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows -Here is the defintion of `sentiment.py`: +Here is the definition of `sentiment.py`: ```python #!/usr/local/bin/python3.9 diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 8314c511236..9c4e87487b4 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -10,9 +10,9 @@ The File table engine keeps the data in a file in one of the supported [file for Usage scenarios: -- Data export from ClickHouse to file. -- Convert data from one format to another. -- Updating data in ClickHouse via editing a file on a disk. +- Data export from ClickHouse to file. +- Convert data from one format to another. +- Updating data in ClickHouse via editing a file on a disk. ## Usage in ClickHouse Server {#usage-in-clickhouse-server} @@ -31,7 +31,7 @@ When creating table using `File(Format)` it creates empty subdirectory in that f You may manually create this subfolder and file in server filesystem and then [ATTACH](../../../sql-reference/statements/attach.md) it to table information with matching name, so you can query data from that file. -:::warning +:::note Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. ::: @@ -78,14 +78,14 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 ## Details of Implementation {#details-of-implementation} -- Multiple `SELECT` queries can be performed concurrently, but `INSERT` queries will wait each other. -- Supported creating new file by `INSERT` query. -- If file exists, `INSERT` would append new values in it. -- Not supported: - - `ALTER` - - `SELECT ... SAMPLE` - - Indices - - Replication +- Multiple `SELECT` queries can be performed concurrently, but `INSERT` queries will wait each other. +- Supported creating new file by `INSERT` query. +- If file exists, `INSERT` would append new values in it. +- Not supported: + - `ALTER` + - `SELECT ... SAMPLE` + - Indices + - Replication ## PARTITION BY diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 77d90082ddc..9fcdb47e555 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -9,13 +9,13 @@ The GenerateRandom table engine produces random data for given table schema. Usage examples: -- Use in test to populate reproducible large table. -- Generate random input for fuzzing tests. +- Use in test to populate reproducible large table. +- Generate random input for fuzzing tests. ## Usage in ClickHouse Server {#usage-in-clickhouse-server} ``` sql -ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length]) +ENGINE = GenerateRandom([random_seed [,max_string_length [,max_array_length]]]) ``` The `max_array_length` and `max_string_length` parameters specify maximum length of all @@ -49,9 +49,9 @@ SELECT * FROM generate_engine_table LIMIT 3 ## Details of Implementation {#details-of-implementation} -- Not supported: - - `ALTER` - - `SELECT ... SAMPLE` - - `INSERT` - - Indices - - Replication +- Not supported: + - `ALTER` + - `SELECT ... SAMPLE` + - `INSERT` + - Indices + - Replication diff --git a/docs/en/engines/table-engines/special/index.md b/docs/en/engines/table-engines/special/index.md index 2247aeae5af..a1c5056de22 100644 --- a/docs/en/engines/table-engines/special/index.md +++ b/docs/en/engines/table-engines/special/index.md @@ -8,8 +8,8 @@ sidebar_label: Special There are three main categories of table engines: -- [MergeTree engine family](../../../engines/table-engines/mergetree-family/index.md) for main production use. -- [Log engine family](../../../engines/table-engines/log-family/index.md) for small temporary data. -- [Table engines for integrations](../../../engines/table-engines/integrations/index.md). +- [MergeTree engine family](../../../engines/table-engines/mergetree-family/index.md) for main production use. +- [Log engine family](../../../engines/table-engines/log-family/index.md) for small temporary data. +- [Table engines for integrations](../../../engines/table-engines/integrations/index.md). The remaining engines are unique in their purpose and are not grouped into families yet, thus they are placed in this “special” category. diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index a7cc525dd6c..e9f0678beee 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -54,8 +54,8 @@ You can use `INSERT` queries to add data to the `Join`-engine tables. If the tab Main use-cases for `Join`-engine tables are following: -- Place the table to the right side in a `JOIN` clause. -- Call the [joinGet](/docs/en/sql-reference/functions/other-functions.md/#joinget) function, which lets you extract data from the table the same way as from a dictionary. +- Place the table to the right side in a `JOIN` clause. +- Call the [joinGet](/docs/en/sql-reference/functions/other-functions.md/#joinget) function, which lets you extract data from the table the same way as from a dictionary. ### Deleting Data {#deleting-data} diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md index 680413039e7..a1c7009b712 100644 --- a/docs/en/engines/table-engines/special/keepermap.md +++ b/docs/en/engines/table-engines/special/keepermap.md @@ -78,7 +78,8 @@ Of course, it's possible to manually run `CREATE TABLE` with same path on nonrel ### Inserts -When new rows are inserted into `KeeperMap`, if the key already exists, the value will be updated, otherwise new key is created. +When new rows are inserted into `KeeperMap`, if the key does not exist, a new entry for the key is created. +If the key exists, and setting `keeper_map_strict_mode` is set to `true`, an exception is thrown, otherwise, the value for the key is overwritten. Example: @@ -89,6 +90,7 @@ INSERT INTO keeper_map_table VALUES ('some key', 1, 'value', 3.2); ### Deletes Rows can be deleted using `DELETE` query or `TRUNCATE`. +If the key exists, and setting `keeper_map_strict_mode` is set to `true`, fetching and deleting data will succeed only if it can be executed atomically. ```sql DELETE FROM keeper_map_table WHERE key LIKE 'some%' AND v1 > 1; @@ -105,7 +107,12 @@ TRUNCATE TABLE keeper_map_table; ### Updates Values can be updated using `ALTER TABLE` query. Primary key cannot be updated. +If setting `keeper_map_strict_mode` is set to `true`, fetching and updating data will succeed only if it's executed atomically. ```sql ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1; ``` + +## Related content + +- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine) diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index ccdb5b5fad7..bd6149406a9 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -78,11 +78,11 @@ SELECT * FROM WatchLog; ## Virtual Columns {#virtual-columns} -- `_table` — Contains the name of the table from which data was read. Type: [String](../../../sql-reference/data-types/string.md). +- `_table` — Contains the name of the table from which data was read. Type: [String](../../../sql-reference/data-types/string.md). You can set the constant conditions on `_table` in the `WHERE/PREWHERE` clause (for example, `WHERE _table='xyz'`). In this case the read operation is performed only for that tables where the condition on `_table` is satisfied, so the `_table` column acts as an index. **See Also** -- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) -- [merge](../../../sql-reference/table-functions/merge.md) table function +- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) +- [merge](../../../sql-reference/table-functions/merge.md) table function diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index af8a80c75b0..a4530767e11 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -14,7 +14,7 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])` - The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats). - If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied. + If this argument is not specified, ClickHouse detects the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied. - `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used. @@ -91,11 +91,11 @@ SELECT * FROM url_engine_table ## Details of Implementation {#details-of-implementation} -- Reads and writes can be parallel -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - Replication. +- Reads and writes can be parallel +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - Replication. ## PARTITION BY diff --git a/docs/en/getting-started/example-datasets/_category_.yml b/docs/en/getting-started/example-datasets/_category_.yml deleted file mode 100644 index 2ee34c63e93..00000000000 --- a/docs/en/getting-started/example-datasets/_category_.yml +++ /dev/null @@ -1,7 +0,0 @@ -position: 1 -label: 'Example Datasets' -collapsible: true -collapsed: true -link: - type: doc - id: en/getting-started/example-datasets/ diff --git a/docs/en/getting-started/example-datasets/amazon-reviews.md b/docs/en/getting-started/example-datasets/amazon-reviews.md new file mode 100644 index 00000000000..f35806aa66f --- /dev/null +++ b/docs/en/getting-started/example-datasets/amazon-reviews.md @@ -0,0 +1,476 @@ +--- +slug: /en/getting-started/example-datasets/amazon-reviews +sidebar_label: Amazon customer reviews +--- + +# Amazon customer reviews dataset + +[**Amazon Customer Reviews**](https://s3.amazonaws.com/amazon-reviews-pds/readme.html) (a.k.a. Product Reviews) is one of Amazon’s iconic products. In a period of over two decades since the first review in 1995, millions of Amazon customers have contributed over a hundred million reviews to express opinions and describe their experiences regarding products on the Amazon.com website. This makes Amazon Customer Reviews a rich source of information for academic researchers in the fields of Natural Language Processing (NLP), Information Retrieval (IR), and Machine Learning (ML), amongst others. By accessing the dataset, you agree to the [license terms](https://s3.amazonaws.com/amazon-reviews-pds/license.txt). + +The data is in a tab-separated format in gzipped files are up in AWS S3. Let's walk through the steps to insert it into ClickHouse. + +:::note +The queries below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). +::: + + +1. Without inserting the data into ClickHouse, we can query it in place. Let's grab some rows so we can see what they look like: + +```sql +SELECT * +FROM s3('https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Wireless_v1_00.tsv.gz', + 'TabSeparatedWithNames', + 'marketplace String, + customer_id Int64, + review_id String, + product_id String, + product_parent Int64, + product_title String, + product_category String, + star_rating Int64, + helpful_votes Int64, + total_votes Int64, + vine Bool, + verified_purchase Bool, + review_headline String, + review_body String, + review_date Date' +) +LIMIT 10; +``` + +The rows look like: + +```response +┌─marketplace─┬─customer_id─┬─review_id──────┬─product_id─┬─product_parent─┬─product_title──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─product_category─┬─star_rating─┬─helpful_votes─┬─total_votes─┬─vine──┬─verified_purchase─┬─review_headline───────────┬─review_body────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─review_date─┐ +│ US │ 16414143 │ R3W4P9UBGNGH1U │ B00YL0EKWE │ 852431543 │ LG G4 Case Hard Transparent Slim Clear Cover for LG G4 │ Wireless │ 2 │ 1 │ 3 │ false │ true │ Looks good, functions meh │ 2 issues - Once I turned on the circle apps and installed this case, my battery drained twice as fast as usual. I ended up turning off the circle apps, which kind of makes the case just a case... with a hole in it. Second, the wireless charging doesn't work. I have a Motorola 360 watch and a Qi charging pad. The watch charges fine but this case doesn't. But hey, it looks nice. │ 2015-08-31 │ +│ US │ 50800750 │ R15V54KBMTQWAY │ B00XK95RPQ │ 516894650 │ Selfie Stick Fiblastiq™ Extendable Wireless Bluetooth Selfie Stick with built-in Bluetooth Adjustable Phone Holder │ Wireless │ 4 │ 0 │ 0 │ false │ false │ A fun little gadget │ I’m embarrassed to admit that until recently, I have had a very negative opinion about “selfie sticks” aka “monopods” aka “narcissticks.” But having reviewed a number of them recently, they’re growing on me. This one is pretty nice and simple to set up and with easy instructions illustrated on the back of the box (not sure why some reviewers have stated that there are no instructions when they are clearly printed on the box unless they received different packaging than I did). Once assembled, the pairing via bluetooth and use of the stick are easy and intuitive. Nothing to it.

The stick comes with a USB charging cable but arrived with a charge so you can use it immediately, though it’s probably a good idea to charge it right away so that you have no interruption of use out of the box. Make sure the stick is switched to on (it will light up) and extend your stick to the length you desire up to about a yard’s length and snap away.

The phone clamp held the phone sturdily so I wasn’t worried about it slipping out. But the longer you extend the stick, the harder it is to maneuver. But that will happen with any stick and is not specific to this one in particular.

Two things that could improve this: 1) add the option to clamp this in portrait orientation instead of having to try and hold the stick at the portrait angle, which makes it feel unstable; 2) add the opening for a tripod so that this can be used to sit upright on a table for skyping and facetime eliminating the need to hold the phone up with your hand, causing fatigue.

But other than that, this is a nice quality monopod for a variety of picture taking opportunities.

I received a sample in exchange for my honest opinion. │ 2015-08-31 │ +│ US │ 15184378 │ RY8I449HNXSVF │ B00SXRXUKO │ 984297154 │ Tribe AB40 Water Resistant Sports Armband with Key Holder for 4.7-Inch iPhone 6S/6/5/5S/5C, Galaxy S4 + Screen Protector - Dark Pink │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Fits iPhone 6 well │ 2015-08-31 │ +│ US │ 10203548 │ R18TLJYCKJFLSR │ B009V5X1CE │ 279912704 │ RAVPower® Element 10400mAh External Battery USB Portable Charger (Dual USB Outputs, Ultra Compact Design), Travel Charger for iPhone 6,iPhone 6 plus,iPhone 5, 5S, 5C, 4S, 4, iPad Air, 4, 3, 2, Mini 2 (Apple adapters not included); Samsung Galaxy S5, S4, S3, S2, Note 3, Note 2; HTC One, EVO, Thunderbolt, Incredible, Droid DNA, Motorola ATRIX, Droid, Moto X, Google Glass, Nexus 4, Nexus 5, Nexus 7, │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Great charger │ Great charger. I easily get 3+ charges on a Samsung Galaxy 3. Works perfectly for camping trips or long days on the boat. │ 2015-08-31 │ +│ US │ 488280 │ R1NK26SWS53B8Q │ B00D93OVF0 │ 662791300 │ Fosmon Micro USB Value Pack Bundle for Samsung Galaxy Exhilarate - Includes Home / Travel Charger, Car / Vehicle Charger and USB Cable │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Great for the price :-) │ 2015-08-31 │ +│ US │ 13334021 │ R11LOHEDYJALTN │ B00XVGJMDQ │ 421688488 │ iPhone 6 Case, Vofolen Impact Resistant Protective Shell iPhone 6S Wallet Cover Shockproof Rubber Bumper Case Anti-scratches Hard Cover Skin Card Slot Holder for iPhone 6 6S │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Great Case, better customer service! │ 2015-08-31 │ +│ US │ 27520697 │ R3ALQVQB2P9LA7 │ B00KQW1X1C │ 554285554 │ Nokia Lumia 630 RM-978 White Factory Unlocked - International Version No Warranty │ Wireless │ 4 │ 0 │ 0 │ false │ true │ Four Stars │ Easy to set up and use. Great functions for the price │ 2015-08-31 │ +│ US │ 48086021 │ R3MWLXLNO21PDQ │ B00IP1MQNK │ 488006702 │ Lumsing 10400mah external battery │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Works great │ 2015-08-31 │ +│ US │ 12738196 │ R2L15IS24CX0LI │ B00HVORET8 │ 389677711 │ iPhone 5S Battery Case - iPhone 5 Battery Case , Maxboost Atomic S [MFI Certified] External Protective Battery Charging Case Power Bank Charger All Versions of Apple iPhone 5/5S [Juice Battery Pack] │ Wireless │ 5 │ 0 │ 0 │ false │ true │ So far so good │ So far so good. It is essentially identical to the one it replaced from another company. That one stopped working after 7 months so I am a bit apprehensive about this one. │ 2015-08-31 │ +│ US │ 15867807 │ R1DJ8976WPWVZU │ B00HX3G6J6 │ 299654876 │ HTC One M8 Screen Protector, Skinomi TechSkin Full Coverage Screen Protector for HTC One M8 Clear HD Anti-Bubble Film │ Wireless │ 3 │ 0 │ 0 │ false │ true │ seems durable but these are always harder to get on ... │ seems durable but these are always harder to get on right than people make them out to be. also send to curl up at the edges after a while. with today's smartphones, you hardly need screen protectors anyway. │ 2015-08-31 │ +└─────────────┴─────────────┴────────────────┴────────────┴────────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴─────────────┴───────────────┴─────────────┴───────┴───────────────────┴─────────────────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────┘ +``` + +:::note +Normally you would not need to pass in the schema into the `s3` table function - ClickHouse can infer the names and data types of the columns. However, this particular dataset uses a non-standard tab-separated format, but the `s3` function seems to work fine with this non-standard format if you include the schema. +::: + +2. Let's define a new table named `amazon_reviews`. We'll optimize some of the column data types - and choose a primary key (the `ORDER BY` clause): + +```sql +CREATE TABLE amazon_reviews +( + review_date Date, + marketplace LowCardinality(String), + customer_id UInt64, + review_id String, + product_id String, + product_parent UInt64, + product_title String, + product_category LowCardinality(String), + star_rating UInt8, + helpful_votes UInt32, + total_votes UInt32, + vine Bool, + verified_purchase Bool, + review_headline String, + review_body String +) +ENGINE = MergeTree +ORDER BY (marketplace, review_date, product_category); +``` + +3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include. + +4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`: + +```sql +INSERT INTO amazon_reviews +WITH + transform(vine, ['Y','N'],[true, false]) AS vine, + transform(verified_purchase, ['Y','N'],[true, false]) AS verified_purchase +SELECT + * +FROM s3Cluster( + 'default', + 'https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_*.tsv.gz', + 'TSVWithNames', + 'review_date Date, + marketplace LowCardinality(String), + customer_id UInt64, + review_id String, + product_id String, + product_parent UInt64, + product_title String, + product_category LowCardinality(String), + star_rating UInt8, + helpful_votes UInt32, + total_votes UInt32, + vine FixedString(1), + verified_purchase FixedString(1), + review_headline String, + review_body String' + ) +SETTINGS input_format_allow_errors_num = 1000000; +``` + +:::tip +In ClickHouse Cloud, there is a cluster named `default`. Change `default` to the name of your cluster...or use the `s3` table function (instead of `s3Cluster`) if you do not have a cluster. +::: + +5. That query doesn't take long - within 5 minutes or so you should see all the rows inserted: + +```sql +SELECT formatReadableQuantity(count()) +FROM amazon_reviews +``` + +```response +┌─formatReadableQuantity(count())─┐ +│ 150.96 million │ +└─────────────────────────────────┘ +``` + +6. Let's see how much space our data is using: + +```sql +SELECT + disk_name, + formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed, + formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed, + round(usize / size, 2) AS compr_rate, + sum(rows) AS rows, + count() AS part_count +FROM system.parts +WHERE (active = 1) AND (table = 'amazon_reviews') +GROUP BY disk_name +ORDER BY size DESC; +``` +The original data was about 70G, but compressed in ClickHouse it takes up about 30G: + +```response +┌─disk_name─┬─compressed─┬─uncompressed─┬─compr_rate─┬──────rows─┬─part_count─┐ +│ s3disk │ 30.00 GiB │ 70.61 GiB │ 2.35 │ 150957260 │ 9 │ +└───────────┴────────────┴──────────────┴────────────┴───────────┴────────────┘ +``` + +7. Let's run some queries...here are the top 10 most-helpful reviews on Amazon: + +```sql +SELECT + product_title, + review_headline +FROM amazon_reviews +ORDER BY helpful_votes DESC +LIMIT 10; +``` + +Notice the query has to process all 151M rows, and it takes about 17 seconds: + +```response +┌─product_title────────────────────────────────────────────────────────────────────────────┬─review_headline───────────────────────────────────────────────────────┐ +│ Kindle: Amazon's Original Wireless Reading Device (1st generation) │ Why and how the Kindle changes everything │ +│ BIC Cristal For Her Ball Pen, 1.0mm, Black, 16ct (MSLP16-Blk) │ FINALLY! │ +│ The Mountain Kids 100% Cotton Three Wolf Moon T-Shirt │ Dual Function Design │ +│ Kindle Keyboard 3G, Free 3G + Wi-Fi, 6" E Ink Display │ Kindle vs. Nook (updated) │ +│ Kindle Fire HD 7", Dolby Audio, Dual-Band Wi-Fi │ You Get What You Pay For │ +│ Kindle Fire (Previous Generation - 1st) │ A great device WHEN you consider price and function, with a few flaws │ +│ Fifty Shades of Grey: Book One of the Fifty Shades Trilogy (Fifty Shades of Grey Series) │ Did a teenager write this??? │ +│ Wheelmate Laptop Steering Wheel Desk │ Perfect for an Starfleet Helmsman │ +│ Kindle Wireless Reading Device (6" Display, U.S. Wireless) │ BEWARE of the SIGNIFICANT DIFFERENCES between Kindle 1 and Kindle 2! │ +│ Tuscan Dairy Whole Vitamin D Milk, Gallon, 128 oz │ Make this your only stock and store │ +└──────────────────────────────────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────┘ + +10 rows in set. Elapsed: 17.595 sec. Processed 150.96 million rows, 15.36 GB (8.58 million rows/s., 872.89 MB/s.) +``` + +8. Here are the top 10 products in Amazon with the most reviews: + +```sql +SELECT + any(product_title), + count() +FROM amazon_reviews +GROUP BY product_id +ORDER BY 2 DESC +LIMIT 10; +``` + +```response +┌─any(product_title)────────────────────────────┬─count()─┐ +│ Candy Crush Saga │ 50051 │ +│ The Secret Society® - Hidden Mystery │ 41255 │ +│ Google Chromecast HDMI Streaming Media Player │ 35977 │ +│ Minecraft │ 35129 │ +│ Bosch Season 1 │ 33610 │ +│ Gone Girl: A Novel │ 33240 │ +│ Subway Surfers │ 32328 │ +│ The Fault in Our Stars │ 30149 │ +│ Amazon.com eGift Cards │ 28879 │ +│ Crossy Road │ 28111 │ +└───────────────────────────────────────────────┴─────────┘ + +10 rows in set. Elapsed: 16.684 sec. Processed 195.05 million rows, 20.86 GB (11.69 million rows/s., 1.25 GB/s.) +``` + +9. Here are the average review ratings per month for each product (an actual [Amazon job interview question](https://datalemur.com/questions/sql-avg-review-ratings)!): + +```sql +SELECT + toStartOfMonth(review_date) AS month, + any(product_title), + avg(star_rating) AS avg_stars +FROM amazon_reviews +GROUP BY + month, + product_id +ORDER BY + month DESC, + product_id ASC +LIMIT 20; +``` + +It calculates all the monthly averages for each product, but we only returned 20 rows: + +```response +┌──────month─┬─any(product_title)──────────────────────────────────────────────────────────────────────┬─avg_stars─┐ +│ 2015-08-01 │ Mystiqueshapes Girls Ballet Tutu Neon Lime Green │ 4 │ +│ 2015-08-01 │ Adult Ballet Tutu Yellow │ 5 │ +│ 2015-08-01 │ The Way Things Work: An Illustrated Encyclopedia of Technology │ 5 │ +│ 2015-08-01 │ Hilda Boswell's Treasury of Poetry │ 5 │ +│ 2015-08-01 │ Treasury of Poetry │ 5 │ +│ 2015-08-01 │ Uncle Remus Stories │ 5 │ +│ 2015-08-01 │ The Book of Daniel │ 5 │ +│ 2015-08-01 │ Berenstains' B Book │ 5 │ +│ 2015-08-01 │ The High Hills (Brambly Hedge) │ 4.5 │ +│ 2015-08-01 │ Fuzzypeg Goes to School (The Little Grey Rabbit library) │ 5 │ +│ 2015-08-01 │ Dictionary in French: The Cat in the Hat (Beginner Series) │ 5 │ +│ 2015-08-01 │ Windfallen │ 5 │ +│ 2015-08-01 │ The Monk Who Sold His Ferrari: A Remarkable Story About Living Your Dreams │ 5 │ +│ 2015-08-01 │ Illustrissimi: The Letters of Pope John Paul I │ 5 │ +│ 2015-08-01 │ Social Contract: A Personal Inquiry into the Evolutionary Sources of Order and Disorder │ 5 │ +│ 2015-08-01 │ Mexico The Beautiful Cookbook: Authentic Recipes from the Regions of Mexico │ 4.5 │ +│ 2015-08-01 │ Alanbrooke │ 5 │ +│ 2015-08-01 │ Back to Cape Horn │ 4 │ +│ 2015-08-01 │ Ovett: An Autobiography (Willow books) │ 5 │ +│ 2015-08-01 │ The Birds of West Africa (Collins Field Guides) │ 4 │ +└────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴───────────┘ + +20 rows in set. Elapsed: 52.827 sec. Processed 251.46 million rows, 35.26 GB (4.76 million rows/s., 667.55 MB/s.) +``` + +10. Here are the total number of votes per product category. This query is fast because `product_category` is in the primary key: + +```sql +SELECT + sum(total_votes), + product_category +FROM amazon_reviews +GROUP BY product_category +ORDER BY 1 DESC; +``` + +```response +┌─sum(total_votes)─┬─product_category─────────┐ +│ 103877874 │ Books │ +│ 25330411 │ Digital_Ebook_Purchase │ +│ 23065953 │ Video DVD │ +│ 18048069 │ Music │ +│ 17292294 │ Mobile_Apps │ +│ 15977124 │ Health & Personal Care │ +│ 13554090 │ PC │ +│ 13065746 │ Kitchen │ +│ 12537926 │ Home │ +│ 11067538 │ Beauty │ +│ 10418643 │ Wireless │ +│ 9089085 │ Toys │ +│ 9071484 │ Sports │ +│ 7335647 │ Electronics │ +│ 6885504 │ Apparel │ +│ 6710085 │ Video Games │ +│ 6556319 │ Camera │ +│ 6305478 │ Lawn and Garden │ +│ 5954422 │ Office Products │ +│ 5339437 │ Home Improvement │ +│ 5284343 │ Outdoors │ +│ 5125199 │ Pet Products │ +│ 4733251 │ Grocery │ +│ 4697750 │ Shoes │ +│ 4666487 │ Automotive │ +│ 4361518 │ Digital_Video_Download │ +│ 4033550 │ Tools │ +│ 3559010 │ Baby │ +│ 3317662 │ Home Entertainment │ +│ 2559501 │ Video │ +│ 2204328 │ Furniture │ +│ 2157587 │ Musical Instruments │ +│ 1881662 │ Software │ +│ 1676081 │ Jewelry │ +│ 1499945 │ Watches │ +│ 1224071 │ Digital_Music_Purchase │ +│ 847918 │ Luggage │ +│ 503939 │ Major Appliances │ +│ 392001 │ Digital_Video_Games │ +│ 348990 │ Personal_Care_Appliances │ +│ 321372 │ Digital_Software │ +│ 169585 │ Mobile_Electronics │ +│ 72970 │ Gift Card │ +└──────────────────┴──────────────────────────┘ + +43 rows in set. Elapsed: 0.423 sec. Processed 150.96 million rows, 756.20 MB (356.70 million rows/s., 1.79 GB/s.) +``` + +11. Let's find the products with the word **"awful"** occurring most frequently in the review. This is a big task - over 151M strings have to be parsed looking for a single word: + +```sql +SELECT + product_id, + any(product_title), + avg(star_rating), + count() AS count +FROM amazon_reviews +WHERE position(review_body, 'awful') > 0 +GROUP BY product_id +ORDER BY count DESC +LIMIT 50; +``` + +The query takes a couple of minutes, but the results are a fun read: + +```response + +┌─product_id─┬─any(product_title)───────────────────────────────────────────────────────────────────────┬───avg(star_rating)─┬─count─┐ +│ 0345803485 │ Fifty Shades of Grey: Book One of the Fifty Shades Trilogy (Fifty Shades of Grey Series) │ 1.3870967741935485 │ 248 │ +│ B007J4T2G8 │ Fifty Shades of Grey (Fifty Shades, Book 1) │ 1.4439834024896265 │ 241 │ +│ B006LSZECO │ Gone Girl: A Novel │ 2.2986425339366514 │ 221 │ +│ B00008OWZG │ St. Anger │ 1.6565656565656566 │ 198 │ +│ B00BD99JMW │ Allegiant (Divergent Trilogy, Book 3) │ 1.8342541436464088 │ 181 │ +│ B0000YUXI0 │ Mavala Switzerland Mavala Stop Nail Biting │ 4.473684210526316 │ 171 │ +│ B004S8F7QM │ Cards Against Humanity │ 4.753012048192771 │ 166 │ +│ 031606792X │ Breaking Dawn (The Twilight Saga, Book 4) │ 1.796875 │ 128 │ +│ 006202406X │ Allegiant (Divergent Series) │ 1.4242424242424243 │ 99 │ +│ B0051VVOB2 │ Kindle Fire (Previous Generation - 1st) │ 2.7448979591836733 │ 98 │ +│ B00I3MP3SG │ Pilot │ 1.8762886597938144 │ 97 │ +│ 030758836X │ Gone Girl │ 2.15625 │ 96 │ +│ B0009X29WK │ Precious Cat Ultra Premium Clumping Cat Litter │ 3.0759493670886076 │ 79 │ +│ B00JB3MVCW │ Noah │ 1.2027027027027026 │ 74 │ +│ B00BAXFECK │ The Goldfinch: A Novel (Pulitzer Prize for Fiction) │ 2.643835616438356 │ 73 │ +│ B00N28818A │ Amazon Prime Video │ 1.4305555555555556 │ 72 │ +│ B007FTE2VW │ SimCity - Limited Edition │ 1.2794117647058822 │ 68 │ +│ 0439023513 │ Mockingjay (The Hunger Games) │ 2.6417910447761193 │ 67 │ +│ B00178630A │ Diablo III - PC/Mac │ 1.671875 │ 64 │ +│ B000OCEWGW │ Liquid Ass │ 4.8125 │ 64 │ +│ B005ZOBNOI │ The Fault in Our Stars │ 4.316666666666666 │ 60 │ +│ B00L9B7IKE │ The Girl on the Train: A Novel │ 2.0677966101694913 │ 59 │ +│ B007S6Y6VS │ Garden of Life Raw Organic Meal │ 2.8793103448275863 │ 58 │ +│ B0064X7B4A │ Words With Friends │ 2.2413793103448274 │ 58 │ +│ B003WUYPPG │ Unbroken: A World War II Story of Survival, Resilience, and Redemption │ 4.620689655172414 │ 58 │ +│ B00006HBUJ │ Star Wars: Episode II - Attack of the Clones (Widescreen Edition) │ 2.2982456140350878 │ 57 │ +│ B000XUBFE2 │ The Book Thief │ 4.526315789473684 │ 57 │ +│ B0006399FS │ How to Dismantle an Atomic Bomb │ 1.9821428571428572 │ 56 │ +│ B003ZSJ212 │ Star Wars: The Complete Saga (Episodes I-VI) (Packaging May Vary) [Blu-ray] │ 2.309090909090909 │ 55 │ +│ 193700788X │ Dead Ever After (Sookie Stackhouse/True Blood) │ 1.5185185185185186 │ 54 │ +│ B004FYEZMQ │ Mass Effect 3 │ 2.056603773584906 │ 53 │ +│ B000CFYAMC │ The Room │ 3.9615384615384617 │ 52 │ +│ B0031JK95S │ Garden of Life Raw Organic Meal │ 3.3137254901960786 │ 51 │ +│ B0012JY4G4 │ Color Oops Hair Color Remover Extra Strength 1 Each │ 3.9019607843137254 │ 51 │ +│ B007VTVRFA │ SimCity - Limited Edition │ 1.2040816326530612 │ 49 │ +│ B00CE18P0K │ Pilot │ 1.7142857142857142 │ 49 │ +│ 0316015849 │ Twilight (The Twilight Saga, Book 1) │ 1.8979591836734695 │ 49 │ +│ B00DR0PDNE │ Google Chromecast HDMI Streaming Media Player │ 2.5416666666666665 │ 48 │ +│ B000056OWC │ The First Years: 4-Stage Bath System │ 1.2127659574468086 │ 47 │ +│ B007IXWKUK │ Fifty Shades Darker (Fifty Shades, Book 2) │ 1.6304347826086956 │ 46 │ +│ 1892112000 │ To Train Up a Child │ 1.4130434782608696 │ 46 │ +│ 043935806X │ Harry Potter and the Order of the Phoenix (Book 5) │ 3.977272727272727 │ 44 │ +│ B00BGO0Q9O │ Fitbit Flex Wireless Wristband with Sleep Function, Black │ 1.9318181818181819 │ 44 │ +│ B003XF1XOQ │ Mockingjay (Hunger Games Trilogy, Book 3) │ 2.772727272727273 │ 44 │ +│ B00DD2B52Y │ Spring Breakers │ 1.2093023255813953 │ 43 │ +│ B0064X7FVE │ The Weather Channel: Forecast, Radar & Alerts │ 1.5116279069767442 │ 43 │ +│ B0083PWAPW │ Kindle Fire HD 7", Dolby Audio, Dual-Band Wi-Fi │ 2.627906976744186 │ 43 │ +│ B00192KCQ0 │ Death Magnetic │ 3.5714285714285716 │ 42 │ +│ B007S6Y74O │ Garden of Life Raw Organic Meal │ 3.292682926829268 │ 41 │ +│ B0052QYLUM │ Infant Optics DXR-5 Portable Video Baby Monitor │ 2.1463414634146343 │ 41 │ +└────────────┴──────────────────────────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘ + +50 rows in set. Elapsed: 60.052 sec. Processed 150.96 million rows, 68.93 GB (2.51 million rows/s., 1.15 GB/s.) +``` + +12. We can run the same query again, except this time we search for **awesome** in the reviews: + +```sql +SELECT + product_id, + any(product_title), + avg(star_rating), + count() AS count +FROM amazon_reviews +WHERE position(review_body, 'awesome') > 0 +GROUP BY product_id +ORDER BY count DESC +LIMIT 50; +``` + +It runs quite a bit faster - which means the cache is helping us out here: + +```response + +┌─product_id─┬─any(product_title)────────────────────────────────────────────────────┬───avg(star_rating)─┬─count─┐ +│ B00992CF6W │ Minecraft │ 4.848130353039482 │ 4787 │ +│ B009UX2YAC │ Subway Surfers │ 4.866720955483171 │ 3684 │ +│ B00QW8TYWO │ Crossy Road │ 4.935217903415784 │ 2547 │ +│ B00DJFIMW6 │ Minion Rush: Despicable Me Official Game │ 4.850450450450451 │ 2220 │ +│ B00AREIAI8 │ My Horse │ 4.865313653136531 │ 2168 │ +│ B00I8Q77Y0 │ Flappy Wings (not Flappy Bird) │ 4.8246561886051085 │ 2036 │ +│ B0054JZC6E │ 101-in-1 Games │ 4.792542016806722 │ 1904 │ +│ B00G5LQ5MU │ Escape The Titanic │ 4.724673710379117 │ 1609 │ +│ B0086700CM │ Temple Run │ 4.87636130685458 │ 1561 │ +│ B009HKL4B8 │ The Sims Freeplay │ 4.763942931258106 │ 1542 │ +│ B00I6IKSZ0 │ Pixel Gun 3D (Pocket Edition) - multiplayer shooter with skin creator │ 4.849894291754757 │ 1419 │ +│ B006OC2ANS │ BLOOD & GLORY │ 4.8561538461538465 │ 1300 │ +│ B00FATEJYE │ Injustice: Gods Among Us (Kindle Tablet Edition) │ 4.789265982636149 │ 1267 │ +│ B00B2V66VS │ Temple Run 2 │ 4.764705882352941 │ 1173 │ +│ B00JOT3HQ2 │ Geometry Dash Lite │ 4.909747292418772 │ 1108 │ +│ B00DUGCLY4 │ Guess The Emoji │ 4.813606710158434 │ 1073 │ +│ B00DR0PDNE │ Google Chromecast HDMI Streaming Media Player │ 4.607276119402985 │ 1072 │ +│ B00FAPF5U0 │ Candy Crush Saga │ 4.825757575757576 │ 1056 │ +│ B0051VVOB2 │ Kindle Fire (Previous Generation - 1st) │ 4.600407747196738 │ 981 │ +│ B007JPG04E │ FRONTLINE COMMANDO │ 4.8125 │ 912 │ +│ B00PTB7B34 │ Call of Duty®: Heroes │ 4.876404494382022 │ 890 │ +│ B00846GKTW │ Style Me Girl - Free 3D Fashion Dressup │ 4.785714285714286 │ 882 │ +│ B004S8F7QM │ Cards Against Humanity │ 4.931034482758621 │ 754 │ +│ B00FAX6XQC │ DEER HUNTER CLASSIC │ 4.700272479564033 │ 734 │ +│ B00PSGW79I │ Buddyman: Kick │ 4.888736263736264 │ 728 │ +│ B00CTQ6SIG │ The Simpsons: Tapped Out │ 4.793948126801153 │ 694 │ +│ B008JK6W5K │ Logo Quiz │ 4.782106782106782 │ 693 │ +│ B00EDTSKLU │ Geometry Dash │ 4.942028985507246 │ 690 │ +│ B00CSR2J9I │ Hill Climb Racing │ 4.880059970014993 │ 667 │ +│ B005ZXWMUS │ Netflix │ 4.722306525037936 │ 659 │ +│ B00CRFAAYC │ Fab Tattoo Artist FREE │ 4.907435508345979 │ 659 │ +│ B00DHQHQCE │ Battle Beach │ 4.863287250384024 │ 651 │ +│ B00BGA9WK2 │ PlayStation 4 500GB Console [Old Model] │ 4.688751926040061 │ 649 │ +│ B008Y7SMQU │ Logo Quiz - Fun Plus Free │ 4.7888 │ 625 │ +│ B0083PWAPW │ Kindle Fire HD 7", Dolby Audio, Dual-Band Wi-Fi │ 4.593900481540931 │ 623 │ +│ B008XG1X18 │ Pinterest │ 4.8148760330578515 │ 605 │ +│ B007SYWFRM │ Ice Age Village │ 4.8566666666666665 │ 600 │ +│ B00K7WGUKA │ Don't Tap The White Tile (Piano Tiles) │ 4.922689075630252 │ 595 │ +│ B00BWYQ9YE │ Kindle Fire HDX 7", HDX Display (Previous Generation - 3rd) │ 4.649913344887349 │ 577 │ +│ B00IZLM8MY │ High School Story │ 4.840425531914893 │ 564 │ +│ B004MC8CA2 │ Bible │ 4.884476534296029 │ 554 │ +│ B00KNWYDU8 │ Dragon City │ 4.861111111111111 │ 540 │ +│ B009ZKSPDK │ Survivalcraft │ 4.738317757009346 │ 535 │ +│ B00A4O6NMG │ My Singing Monsters │ 4.845559845559846 │ 518 │ +│ B002MQYOFW │ The Hunger Games (Hunger Games Trilogy, Book 1) │ 4.846899224806202 │ 516 │ +│ B005ZFOOE8 │ iHeartRadio – Free Music & Internet Radio │ 4.837301587301587 │ 504 │ +│ B00AIUUXHC │ Hungry Shark Evolution │ 4.846311475409836 │ 488 │ +│ B00E8KLWB4 │ The Secret Society® - Hidden Mystery │ 4.669438669438669 │ 481 │ +│ B006D1ONE4 │ Where's My Water? │ 4.916317991631799 │ 478 │ +│ B00G6ZTM3Y │ Terraria │ 4.728421052631579 │ 475 │ +└────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘ + +50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.) +``` \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index b19d09c777a..048eecb285b 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -1,9 +1,10 @@ --- slug: /en/getting-started/example-datasets/cell-towers -sidebar_label: Cell Towers +sidebar_label: Geo Data sidebar_position: 3 -title: "Cell Towers" +title: "Geo Data using the Cell Tower Dataset" --- + import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx'; import Tabs from '@theme/Tabs'; @@ -163,7 +164,7 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia. -You may want to create a [Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values. +You may want to create a [Dictionary](../../sql-reference/dictionaries/index.md) in ClickHouse to decode these values. ## Use case: Incorporate geo data {#use-case} @@ -307,7 +308,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should: ![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png) :::note - If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/). + If **ClickHouse Connect** is not one of your options, then you will need to install it. The command is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/). ::: #### Add your connection details: diff --git a/docs/en/getting-started/example-datasets/covid19.md b/docs/en/getting-started/example-datasets/covid19.md new file mode 100644 index 00000000000..3a7fae89ae0 --- /dev/null +++ b/docs/en/getting-started/example-datasets/covid19.md @@ -0,0 +1,265 @@ +--- +slug: /en/getting-started/example-datasets/covid19 +sidebar_label: COVID-19 Open-Data +--- + +# COVID-19 Open-Data + +COVID-19 Open-Data attempts to assemble the largest Covid-19 epidemiological database, in addition to a powerful set of expansive covariates. It includes open, publicly sourced, licensed data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, weather, and more. + +The details are in GitHub [here](https://github.com/GoogleCloudPlatform/covid-19-open-data). + +It's easy to insert this data into ClickHouse... + +:::note +The following commands were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). You can easily run them on a local install as well. +::: + +1. Let's see what the data looks like: + +```sql +DESCRIBE url( + 'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv', + 'CSVWithNames' +); +``` + +The CSV file has 10 columns: + +```response +┌─name─────────────────┬─type─────────────┐ +│ date │ Nullable(String) │ +│ location_key │ Nullable(String) │ +│ new_confirmed │ Nullable(Int64) │ +│ new_deceased │ Nullable(Int64) │ +│ new_recovered │ Nullable(Int64) │ +│ new_tested │ Nullable(Int64) │ +│ cumulative_confirmed │ Nullable(Int64) │ +│ cumulative_deceased │ Nullable(Int64) │ +│ cumulative_recovered │ Nullable(Int64) │ +│ cumulative_tested │ Nullable(Int64) │ +└──────────────────────┴──────────────────┘ + +10 rows in set. Elapsed: 0.745 sec. +``` + +2. Now let's view some of the rows: + +```sql +SELECT * +FROM url('https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv') +LIMIT 100; +``` + +Notice the `url` function easily reads data from a CSV file: + +```response +┌─c1─────────┬─c2───────────┬─c3────────────┬─c4───────────┬─c5────────────┬─c6─────────┬─c7───────────────────┬─c8──────────────────┬─c9───────────────────┬─c10───────────────┐ +│ date │ location_key │ new_confirmed │ new_deceased │ new_recovered │ new_tested │ cumulative_confirmed │ cumulative_deceased │ cumulative_recovered │ cumulative_tested │ +│ 2020-04-03 │ AD │ 24 │ 1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 466 │ 17 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ 2020-04-04 │ AD │ 57 │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 523 │ 17 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ 2020-04-05 │ AD │ 17 │ 4 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 540 │ 21 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ 2020-04-06 │ AD │ 11 │ 1 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 551 │ 22 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ 2020-04-07 │ AD │ 15 │ 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 566 │ 24 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ 2020-04-08 │ AD │ 23 │ 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 589 │ 26 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +└────────────┴──────────────┴───────────────┴──────────────┴───────────────┴────────────┴──────────────────────┴─────────────────────┴──────────────────────┴───────────────────┘ +``` + +3. We will create a table now that we know what the data looks like: + +```sql +CREATE TABLE covid19 ( + date Date, + location_key LowCardinality(String), + new_confirmed Int32, + new_deceased Int32, + new_recovered Int32, + new_tested Int32, + cumulative_confirmed Int32, + cumulative_deceased Int32, + cumulative_recovered Int32, + cumulative_tested Int32 +) +ENGINE = MergeTree +ORDER BY (location_key, date); +``` + +4. The following command inserts the entire dataset into the `covid19` table: + +```sql +INSERT INTO covid19 + SELECT * + FROM + url( + 'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv', + CSVWithNames, + 'date Date, + location_key LowCardinality(String), + new_confirmed Int32, + new_deceased Int32, + new_recovered Int32, + new_tested Int32, + cumulative_confirmed Int32, + cumulative_deceased Int32, + cumulative_recovered Int32, + cumulative_tested Int32' + ); +``` + +5. It goes pretty quick - let's see how many rows were inserted: + +```sql +SELECT formatReadableQuantity(count()) +FROM covid19; +``` + +```response +┌─formatReadableQuantity(count())─┐ +│ 12.53 million │ +└─────────────────────────────────┘ +``` + +6. Let's see how many total cases of Covid-19 were recorded: + +```sql +SELECT formatReadableQuantity(sum(new_confirmed)) +FROM covid19; +``` + +```response +┌─formatReadableQuantity(sum(new_confirmed))─┐ +│ 1.39 billion │ +└────────────────────────────────────────────┘ +``` + +7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases: + +```sql +SELECT + AVG(new_confirmed) OVER (PARTITION BY location_key ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS cases_smoothed, + new_confirmed, + location_key, + date +FROM covid19; +``` + +8. This query determines the latest values for each location. We can't use `max(date)` because not all countries reported every day, so we grab the last row using `ROW_NUMBER`: + +```sql +WITH latest_deaths_data AS + ( SELECT location_key, + date, + new_deceased, + new_confirmed, + ROW_NUMBER() OVER (PARTITION BY location_key ORDER BY date DESC) as rn + FROM covid19) +SELECT location_key, + date, + new_deceased, + new_confirmed, + rn +FROM latest_deaths_data +WHERE rn=1; +``` + +9. We can use `lagInFrame` to determine the `LAG` of new cases each day. In this query we filter by the `US_DC` location: + +```sql +SELECT + new_confirmed - lagInFrame(new_confirmed,1) OVER (PARTITION BY location_key ORDER BY date) AS confirmed_cases_delta, + new_confirmed, + location_key, + date +FROM covid19 +WHERE location_key = 'US_DC'; +``` + +The response look like: + +```response +┌─confirmed_cases_delta─┬─new_confirmed─┬─location_key─┬───────date─┐ +│ 0 │ 0 │ US_DC │ 2020-03-08 │ +│ 2 │ 2 │ US_DC │ 2020-03-09 │ +│ -2 │ 0 │ US_DC │ 2020-03-10 │ +│ 6 │ 6 │ US_DC │ 2020-03-11 │ +│ -6 │ 0 │ US_DC │ 2020-03-12 │ +│ 0 │ 0 │ US_DC │ 2020-03-13 │ +│ 6 │ 6 │ US_DC │ 2020-03-14 │ +│ -5 │ 1 │ US_DC │ 2020-03-15 │ +│ 4 │ 5 │ US_DC │ 2020-03-16 │ +│ 4 │ 9 │ US_DC │ 2020-03-17 │ +│ -1 │ 8 │ US_DC │ 2020-03-18 │ +│ 24 │ 32 │ US_DC │ 2020-03-19 │ +│ -26 │ 6 │ US_DC │ 2020-03-20 │ +│ 15 │ 21 │ US_DC │ 2020-03-21 │ +│ -3 │ 18 │ US_DC │ 2020-03-22 │ +│ 3 │ 21 │ US_DC │ 2020-03-23 │ +``` + +10. This query calculates the percentage of change in new cases each day, and includes a simple `increase` or `decrease` column in the result set: + +```sql +WITH confirmed_lag AS ( + SELECT + *, + lagInFrame(new_confirmed) OVER( + PARTITION BY location_key + ORDER BY date + ) AS confirmed_previous_day + FROM covid19 +), +confirmed_percent_change AS ( + SELECT + *, + COALESCE(ROUND((new_confirmed - confirmed_previous_day) / confirmed_previous_day * 100), 0) AS percent_change + FROM confirmed_lag +) +SELECT + date, + new_confirmed, + percent_change, + CASE + WHEN percent_change > 0 THEN 'increase' + WHEN percent_change = 0 THEN 'no change' + ELSE 'decrease' + END AS trend +FROM confirmed_percent_change +WHERE location_key = 'US_DC'; +``` + +The results look like + +```response +┌───────date─┬─new_confirmed─┬─percent_change─┬─trend─────┐ +│ 2020-03-08 │ 0 │ nan │ decrease │ +│ 2020-03-09 │ 2 │ inf │ increase │ +│ 2020-03-10 │ 0 │ -100 │ decrease │ +│ 2020-03-11 │ 6 │ inf │ increase │ +│ 2020-03-12 │ 0 │ -100 │ decrease │ +│ 2020-03-13 │ 0 │ nan │ decrease │ +│ 2020-03-14 │ 6 │ inf │ increase │ +│ 2020-03-15 │ 1 │ -83 │ decrease │ +│ 2020-03-16 │ 5 │ 400 │ increase │ +│ 2020-03-17 │ 9 │ 80 │ increase │ +│ 2020-03-18 │ 8 │ -11 │ decrease │ +│ 2020-03-19 │ 32 │ 300 │ increase │ +│ 2020-03-20 │ 6 │ -81 │ decrease │ +│ 2020-03-21 │ 21 │ 250 │ increase │ +│ 2020-03-22 │ 18 │ -14 │ decrease │ +│ 2020-03-23 │ 21 │ 17 │ increase │ +│ 2020-03-24 │ 46 │ 119 │ increase │ +│ 2020-03-25 │ 48 │ 4 │ increase │ +│ 2020-03-26 │ 36 │ -25 │ decrease │ +│ 2020-03-27 │ 37 │ 3 │ increase │ +│ 2020-03-28 │ 38 │ 3 │ increase │ +│ 2020-03-29 │ 59 │ 55 │ increase │ +│ 2020-03-30 │ 94 │ 59 │ increase │ +│ 2020-03-31 │ 91 │ -3 │ decrease │ +│ 2020-04-01 │ 67 │ -26 │ decrease │ +│ 2020-04-02 │ 104 │ 55 │ increase │ +│ 2020-04-03 │ 145 │ 39 │ increase │ +``` + +:::note +As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022. +::: \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md index 3bd0230d4cc..a2e0fda0cb0 100644 --- a/docs/en/getting-started/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -3,14 +3,56 @@ slug: /en/getting-started/example-datasets/criteo sidebar_label: Terabyte Click Logs from Criteo --- -# Terabyte of Click Logs from Criteo +# Terabyte of Click Logs from Criteo Download the data from http://labs.criteo.com/downloads/download-terabyte-click-logs/ Create a table to import the log to: ``` sql -CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log +CREATE TABLE criteo_log ( + date Date, + clicked UInt8, + int1 Int32, + int2 Int32, + int3 Int32, + int4 Int32, + int5 Int32, + int6 Int32, + int7 Int32, + int8 Int32, + int9 Int32, + int10 Int32, + int11 Int32, + int12 Int32, + int13 Int32, + cat1 String, + cat2 String, + cat3 String, + cat4 String, + cat5 String, + cat6 String, + cat7 String, + cat8 String, + cat9 String, + cat10 String, + cat11 String, + cat12 String, + cat13 String, + cat14 String, + cat15 String, + cat16 String, + cat17 String, + cat18 String, + cat19 String, + cat20 String, + cat21 String, + cat22 String, + cat23 String, + cat24 String, + cat25 String, + cat26 String +) ENGINE = Log; ``` Download the data: @@ -73,7 +115,52 @@ ORDER BY (date, icat1) Transform data from the raw log and put it in the second table: ``` sql -INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log; +INSERT INTO + criteo +SELECT + date, + clicked, + int1, + int2, + int3, + int4, + int5, + int6, + int7, + int8, + int9, + int10, + int11, + int12, + int13, + reinterpretAsUInt32(unhex(cat1)) AS icat1, + reinterpretAsUInt32(unhex(cat2)) AS icat2, + reinterpretAsUInt32(unhex(cat3)) AS icat3, + reinterpretAsUInt32(unhex(cat4)) AS icat4, + reinterpretAsUInt32(unhex(cat5)) AS icat5, + reinterpretAsUInt32(unhex(cat6)) AS icat6, + reinterpretAsUInt32(unhex(cat7)) AS icat7, + reinterpretAsUInt32(unhex(cat8)) AS icat8, + reinterpretAsUInt32(unhex(cat9)) AS icat9, + reinterpretAsUInt32(unhex(cat10)) AS icat10, + reinterpretAsUInt32(unhex(cat11)) AS icat11, + reinterpretAsUInt32(unhex(cat12)) AS icat12, + reinterpretAsUInt32(unhex(cat13)) AS icat13, + reinterpretAsUInt32(unhex(cat14)) AS icat14, + reinterpretAsUInt32(unhex(cat15)) AS icat15, + reinterpretAsUInt32(unhex(cat16)) AS icat16, + reinterpretAsUInt32(unhex(cat17)) AS icat17, + reinterpretAsUInt32(unhex(cat18)) AS icat18, + reinterpretAsUInt32(unhex(cat19)) AS icat19, + reinterpretAsUInt32(unhex(cat20)) AS icat20, + reinterpretAsUInt32(unhex(cat21)) AS icat21, + reinterpretAsUInt32(unhex(cat22)) AS icat22, + reinterpretAsUInt32(unhex(cat23)) AS icat23, + reinterpretAsUInt32(unhex(cat24)) AS icat24, + reinterpretAsUInt32(unhex(cat25)) AS icat25, + reinterpretAsUInt32(unhex(cat26)) AS icat26 +FROM + criteo_log; DROP TABLE criteo_log; ``` diff --git a/docs/en/getting-started/example-datasets/environmental-sensors.md b/docs/en/getting-started/example-datasets/environmental-sensors.md new file mode 100644 index 00000000000..309a6dc6c0f --- /dev/null +++ b/docs/en/getting-started/example-datasets/environmental-sensors.md @@ -0,0 +1,172 @@ +--- +slug: /en/getting-started/example-datasets/environmental-sensors +sidebar_label: Environmental Sensors Data +--- + +# Environmental Sensors Data + +[Sensor.Community](https://sensor.community/en/) is a contributors-driven global sensor network that creates Open Environmental Data. The data is collected from sensors all over the globe. Anyone can purchase a sensor and place it wherever they like. The APIs to download the data is in [GitHub](https://github.com/opendata-stuttgart/meta/wiki/APIs) and the data is freely available under the [Database Contents License (DbCL)](https://opendatacommons.org/licenses/dbcl/1-0/). + +:::important +The dataset has over 20 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). +::: + +1. The data is in S3, so we can use the `s3` table function to create a table from the files. We can also query the data in place. Let's look at a few rows before attempting to insert it into ClickHouse: + +```sql +SELECT * +FROM s3( + 'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/sensors/monthly/2019-06_bmp180.csv.zst', + 'CSVWithNames' + ) +LIMIT 10 +SETTINGS format_csv_delimiter = ';'; +``` + +The data is in CSV files but uses a semi-colon for the delimiter. The rows look like: + +```response +┌─sensor_id─┬─sensor_type─┬─location─┬────lat─┬────lon─┬─timestamp───────────┬──pressure─┬─altitude─┬─pressure_sealevel─┬─temperature─┐ +│ 9119 │ BMP180 │ 4594 │ 50.994 │ 7.126 │ 2019-06-01T00:00:00 │ 101471 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.9 │ +│ 21210 │ BMP180 │ 10762 │ 42.206 │ 25.326 │ 2019-06-01T00:00:00 │ 99525 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.3 │ +│ 19660 │ BMP180 │ 9978 │ 52.434 │ 17.056 │ 2019-06-01T00:00:04 │ 101570 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 15.3 │ +│ 12126 │ BMP180 │ 6126 │ 57.908 │ 16.49 │ 2019-06-01T00:00:05 │ 101802.56 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 8.07 │ +│ 15845 │ BMP180 │ 8022 │ 52.498 │ 13.466 │ 2019-06-01T00:00:05 │ 101878 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 23 │ +│ 16415 │ BMP180 │ 8316 │ 49.312 │ 6.744 │ 2019-06-01T00:00:06 │ 100176 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 14.7 │ +│ 7389 │ BMP180 │ 3735 │ 50.136 │ 11.062 │ 2019-06-01T00:00:06 │ 98905 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 12.1 │ +│ 13199 │ BMP180 │ 6664 │ 52.514 │ 13.44 │ 2019-06-01T00:00:07 │ 101855.54 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.74 │ +│ 12753 │ BMP180 │ 6440 │ 44.616 │ 2.032 │ 2019-06-01T00:00:07 │ 99475 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17 │ +│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │ +└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘ +``` + +2. We will use the following `MergeTree` table to store the data in ClickHouse: + +```sql +CREATE TABLE sensors +( + sensor_id UInt16, + sensor_type Enum('BME280', 'BMP180', 'BMP280', 'DHT22', 'DS18B20', 'HPM', 'HTU21D', 'PMS1003', 'PMS3003', 'PMS5003', 'PMS6003', 'PMS7003', 'PPD42NS', 'SDS011'), + location UInt32, + lat Float32, + lon Float32, + timestamp DateTime, + P1 Float32, + P2 Float32, + P0 Float32, + durP1 Float32, + ratioP1 Float32, + durP2 Float32, + ratioP2 Float32, + pressure Float32, + altitude Float32, + pressure_sealevel Float32, + temperature Float32, + humidity Float32, + date Date MATERIALIZED toDate(timestamp) +) +ENGINE = MergeTree +ORDER BY (timestamp, sensor_id); +``` + +3. ClickHouse Cloud services have a cluster named `default`. We will use the `s3Cluster` table function, which reads S3 files in parallel from the nodes in your cluster. (If you do not have a cluster, just use the `s3` function and remove the cluster name.) + +This query will take a while - it's about 1.67T of data uncompressed: + +```sql +INSERT INTO sensors + SELECT * + FROM s3Cluster( + 'default', + 'https://clickhouse-public-datasets.s3.amazonaws.com/sensors/monthly/*.csv.zst', + 'CSVWithNames', + $$ sensor_id UInt16, + sensor_type String, + location UInt32, + lat Float32, + lon Float32, + timestamp DateTime, + P1 Float32, + P2 Float32, + P0 Float32, + durP1 Float32, + ratioP1 Float32, + durP2 Float32, + ratioP2 Float32, + pressure Float32, + altitude Float32, + pressure_sealevel Float32, + temperature Float32, + humidity Float32 $$ + ) +SETTINGS + format_csv_delimiter = ';', + input_format_allow_errors_ratio = '0.5', + input_format_allow_errors_num = 10000, + input_format_parallel_parsing = 0, + date_time_input_format = 'best_effort', + max_insert_threads = 32, + parallel_distributed_insert_select = 1; +``` + +Here is the response - showing the number of rows and the speed of processing. It is input at a rate of over 6M rows per second! + +```response +0 rows in set. Elapsed: 3419.330 sec. Processed 20.69 billion rows, 1.67 TB (6.05 million rows/s., 488.52 MB/s.) +``` + +4. Let's see how much storage disk is needed for the `sensors` table: + +```sql +SELECT + disk_name, + formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed, + formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed, + round(usize / size, 2) AS compr_rate, + sum(rows) AS rows, + count() AS part_count +FROM system.parts +WHERE (active = 1) AND (table = 'sensors') +GROUP BY + disk_name +ORDER BY size DESC; +``` + +The 1.67T is compressed down to 310 GiB, and there are 20.69 billion rows: + +```response +┌─disk_name─┬─compressed─┬─uncompressed─┬─compr_rate─┬────────rows─┬─part_count─┐ +│ s3disk │ 310.21 GiB │ 1.30 TiB │ 4.29 │ 20693971809 │ 472 │ +└───────────┴────────────┴──────────────┴────────────┴─────────────┴────────────┘ +``` + +5. Let's analyze the data now that it's in ClickHouse. Notice the quantity of data increases over time as more sensors are deployed: + +```sql +SELECT + date, + count() +FROM sensors +GROUP BY date +ORDER BY date ASC; +``` + +We can create a chart in the SQL Console to visualize the results: + +![Number of events per day](./images/sensors_01.png) + +6. This query counts the number of overly hot and humid days: + +```sql +WITH + toYYYYMMDD(timestamp) AS day +SELECT day, count() FROM sensors +WHERE temperature >= 40 AND temperature <= 50 AND humidity >= 90 +GROUP BY day +ORDER BY day asc; +``` + +Here's a visualization of the result: + +![Hot and humid days](./images/sensors_02.png) + diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md index 239637a34e9..02965ed5e33 100644 --- a/docs/en/getting-started/example-datasets/github.md +++ b/docs/en/getting-started/example-datasets/github.md @@ -1,12 +1,13 @@ --- slug: /en/getting-started/example-datasets/github -sidebar_label: GitHub Repo Analysis +sidebar_label: Github Repo +sidebar_position: 1 description: Analyze the ClickHouse GitHub repo or any repository of your choosing --- -# ClickHouse GitHub data +# Writing Queries in ClickHouse using GitHub Data -This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse. +This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse. The generated data provides a `tsv` file for each of the following tables: @@ -323,7 +324,7 @@ Note a more complex variant of this query exists where we find the [line-by-line ## Find the current active files -This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named). +This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named). **Note there appears to have been a broken commit history in relation to files under the `dbms`, `libs`, `tests/testflows/` directories during their renames. We also thus exclude these.** @@ -417,7 +418,7 @@ git ls-files | grep -v -E 'generated\.cpp|^(contrib|docs?|website|libs/(libcityh The difference here is caused by a few factors: -- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained. +- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained. [play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICAgIGNoYW5nZV90eXBlLAogICAgICBwYXRoLAogICAgICBvbGRfcGF0aCwKICAgICAgdGltZSwKICAgICAgY29tbWl0X2hhc2gKICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogIFdIRVJFIChwYXRoID0gJ3NyYy9GdW5jdGlvbnMvZ2VvbWV0cnlGcm9tQ29sdW1uLmgnKSBPUiAob2xkX3BhdGggPSAnc3JjL0Z1bmN0aW9ucy9nZW9tZXRyeUZyb21Db2x1bW4uaCcpCg==) @@ -1386,7 +1387,7 @@ LIMIT 1 BY day_of_week 7 rows in set. Elapsed: 0.004 sec. Processed 21.82 thousand rows, 140.02 KB (4.88 million rows/s., 31.29 MB/s.) ``` -This is still a little simple and doesn't reflect people's work. +This is still a little simple and doesn't reflect people's work. A better metric might be who is the top contributor each day as a fraction of the total work performed in the last year. Note that we treat the deletion and adding code equally. @@ -1952,7 +1953,7 @@ SELECT Most contributors write more code than tests, as you'd expect. -What about who adds the most comments when contributing code? +What about who adds the most comments when contributing code? [play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBhdmcocmF0aW9fY29tbWVudHMpIEFTIGF2Z19yYXRpb19jb21tZW50cywKICAgIHN1bShjb2RlKSBBUyBjb2RlCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgIGNvdW50SWYobGluZV90eXBlID0gJ0NvbW1lbnQnKSBBUyBjb21tZW50cywKICAgICAgICBjb3VudElmKGxpbmVfdHlwZSA9ICdDb2RlJykgQVMgY29kZSwKICAgICAgICBpZihjb21tZW50cyA+IDAsIGNvbW1lbnRzIC8gKGNvbW1lbnRzICsgY29kZSksIDApIEFTIHJhdGlvX2NvbW1lbnRzCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgR1JPVVAgQlkKICAgICAgICBhdXRob3IsCiAgICAgICAgY29tbWl0X2hhc2gKKQpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgY29kZSBERVNDCkxJTUlUIDEwCg==) @@ -2393,7 +2394,7 @@ WHERE (path = 'src/Storages/StorageReplicatedMergeTree.cpp') AND (change_type = This makes viewing the full history of a file challenging since we don't have a single value connecting all line or file changes. -To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly. +To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly. This means we can only track renames to a maximum depth - the below example is 5 deep. It is unlikely a file will be renamed more times than this, so for now, this is sufficient. @@ -2498,7 +2499,9 @@ LIMIT 20 We welcome exact and improved solutions here. -# Related Content +## Related Content -- [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits) -- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- Blog: [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits) +- Blog: [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine) +- Blog: [A Story of Open-source GitHub Activity using ClickHouse + Grafana](https://clickhouse.com/blog/introduction-to-clickhouse-and-grafana-webinar) diff --git a/docs/en/getting-started/example-datasets/images/sensors_01.png b/docs/en/getting-started/example-datasets/images/sensors_01.png new file mode 100644 index 00000000000..1804bda6d1b Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/sensors_01.png differ diff --git a/docs/en/getting-started/example-datasets/images/sensors_02.png b/docs/en/getting-started/example-datasets/images/sensors_02.png new file mode 100644 index 00000000000..8226f4578d0 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/sensors_02.png differ diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index e966f6c20d6..e21237f39bb 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -84,7 +84,7 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" 1680609 ``` -## An example JOIN +## An example JOIN The hits and visits dataset is used in the ClickHouse test routines, this is one of the queries from the test suite. The rest @@ -131,10 +131,10 @@ FORMAT PrettyCompact" ## Next Steps -[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices. +[A Practical Introduction to Sparse Primary Indexes in ClickHouse](/docs/en/guides/best-practices/sparse-primary-indexes.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices. Additional examples of queries to these tables can be found among the ClickHouse [stateful tests](https://github.com/ClickHouse/ClickHouse/blob/d7129855757f38ceec3e4ecc6dafacdabe9b178f/tests/queries/1_stateful/00172_parallel_join.sql). :::note -The test suite uses a database name `test`, and the tables are named `hits` and `visits`. You can rename your database and tables, or edit the SQL from the test file. +The test suite uses a database name `test`, and the tables are named `hits` and `visits`. You can rename your database and tables, or edit the SQL from the test file. ::: diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index 69098f63037..9730faa873c 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -5,17 +5,19 @@ sidebar_position: 2 description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009 --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # New York Taxi Data The New York taxi data consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. The dataset can be obtained in a couple of ways: -- insert the data directly into ClickHouse Cloud from S3 +- insert the data directly into ClickHouse Cloud from S3 or GCS - download prepared partitions -## Retrieve the Data from S3 +## Create the table trips -Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in AWS S3, which is easily streamed into -ClickHouse Cloud using the `s3` table function. Start by creating a table for the taxi rides: +Start by creating a table for the taxi rides: ```sql CREATE TABLE trips ( @@ -38,9 +40,50 @@ CREATE TABLE trips ( dropoff_ntaname LowCardinality(String) ) ENGINE = MergeTree -PRIMARY KEY (pickup_datetime, dropoff_datetime) +PRIMARY KEY (pickup_datetime, dropoff_datetime); ``` +## Load the Data directly from Object Storage + +Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in object storage, which is easily streamed into +ClickHouse Cloud using the `s3` table function. + +The same data is stored in both S3 and GCS; choose either tab. + + + + +The following command streams three files from a GCS bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): + +```sql +INSERT INTO trips +SELECT + trip_id, + pickup_datetime, + dropoff_datetime, + pickup_longitude, + pickup_latitude, + dropoff_longitude, + dropoff_latitude, + passenger_count, + trip_distance, + fare_amount, + extra, + tip_amount, + tolls_amount, + total_amount, + payment_type, + pickup_ntaname, + dropoff_ntaname +FROM s3( + 'https://storage.googleapis.com/clickhouse-public-datasets/nyc-taxi/trips_{0..2}.gz', + 'TabSeparatedWithNames' +); +``` + + + + The following command streams three files from an S3 bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2): ```sql @@ -66,14 +109,19 @@ SELECT FROM s3( 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz', 'TabSeparatedWithNames' -) +); ``` + + + +## Sample Queries + Let's see how many rows were inserted: ```sql SELECT count() -FROM trips +FROM trips; ``` Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's look at a few rows: @@ -81,7 +129,7 @@ Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's ```sql SELECT * FROM trips -LIMIT 10 +LIMIT 10; ``` Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more: @@ -110,7 +158,7 @@ SELECT FROM trips GROUP BY pickup_ntaname ORDER BY count DESC -LIMIT 10 +LIMIT 10; ``` The result is: @@ -137,7 +185,7 @@ SELECT passenger_count, avg(total_amount) FROM trips -GROUP BY passenger_count +GROUP BY passenger_count; ``` ```response @@ -165,7 +213,7 @@ SELECT count(*) FROM trips GROUP BY passenger_count, year, distance -ORDER BY year, count(*) DESC +ORDER BY year, count(*) DESC; ``` The first part of the result is: @@ -189,6 +237,10 @@ The first part of the result is: ## Download of Prepared Partitions {#download-of-prepared-partitions} +:::note +The following steps provide information about the original dataset, and a method for loading prepared partitions into a self-managed ClickHouse server environment. +::: + See https://github.com/toddwschneider/nyc-taxi-data and http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html for the description of a dataset and instructions for downloading. Downloading will result in about 227 GB of uncompressed data in CSV files. The download takes about an hour over a 1 Gbit connection (parallel downloading from s3.amazonaws.com recovers at least half of a 1 Gbit channel). @@ -211,7 +263,7 @@ If you will run the queries described below, you have to use the full table name Q1: ``` sql -SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type +SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type; ``` 0.490 seconds. @@ -219,7 +271,7 @@ SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type Q2: ``` sql -SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count +SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count; ``` 1.224 seconds. @@ -227,7 +279,7 @@ SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenge Q3: ``` sql -SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year +SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year; ``` 2.104 seconds. @@ -238,7 +290,7 @@ Q4: SELECT passenger_count, toYear(pickup_date) AS year, round(trip_distance) AS distance, count(*) FROM trips_mergetree GROUP BY passenger_count, year, distance -ORDER BY year, count(*) DESC +ORDER BY year, count(*) DESC; ``` 3.593 seconds. @@ -254,19 +306,19 @@ Creating a table on three servers: On each server: ``` sql -CREATE TABLE default.trips_mergetree_third ( trip_id UInt32, vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14), pickup_date Date, pickup_datetime DateTime, dropoff_date Date, dropoff_datetime DateTime, store_and_fwd_flag UInt8, rate_code_id UInt8, pickup_longitude Float64, pickup_latitude Float64, dropoff_longitude Float64, dropoff_latitude Float64, passenger_count UInt8, trip_distance Float64, fare_amount Float32, extra Float32, mta_tax Float32, tip_amount Float32, tolls_amount Float32, ehail_fee Float32, improvement_surcharge Float32, total_amount Float32, payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), trip_type UInt8, pickup FixedString(25), dropoff FixedString(25), cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3), pickup_nyct2010_gid UInt8, pickup_ctlabel Float32, pickup_borocode UInt8, pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), pickup_ct2010 FixedString(6), pickup_boroct2010 FixedString(7), pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), pickup_ntacode FixedString(4), pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), pickup_puma UInt16, dropoff_nyct2010_gid UInt8, dropoff_ctlabel Float32, dropoff_borocode UInt8, dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), dropoff_ct2010 FixedString(6), dropoff_boroct2010 FixedString(7), dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), dropoff_ntacode FixedString(4), dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192) +CREATE TABLE default.trips_mergetree_third ( trip_id UInt32, vendor_id Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14), pickup_date Date, pickup_datetime DateTime, dropoff_date Date, dropoff_datetime DateTime, store_and_fwd_flag UInt8, rate_code_id UInt8, pickup_longitude Float64, pickup_latitude Float64, dropoff_longitude Float64, dropoff_latitude Float64, passenger_count UInt8, trip_distance Float64, fare_amount Float32, extra Float32, mta_tax Float32, tip_amount Float32, tolls_amount Float32, ehail_fee Float32, improvement_surcharge Float32, total_amount Float32, payment_type_ Enum8('UNK' = 0, 'CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4), trip_type UInt8, pickup FixedString(25), dropoff FixedString(25), cab_type Enum8('yellow' = 1, 'green' = 2, 'uber' = 3), pickup_nyct2010_gid UInt8, pickup_ctlabel Float32, pickup_borocode UInt8, pickup_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), pickup_ct2010 FixedString(6), pickup_boroct2010 FixedString(7), pickup_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), pickup_ntacode FixedString(4), pickup_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), pickup_puma UInt16, dropoff_nyct2010_gid UInt8, dropoff_ctlabel Float32, dropoff_borocode UInt8, dropoff_boroname Enum8('' = 0, 'Manhattan' = 1, 'Bronx' = 2, 'Brooklyn' = 3, 'Queens' = 4, 'Staten Island' = 5), dropoff_ct2010 FixedString(6), dropoff_boroct2010 FixedString(7), dropoff_cdeligibil Enum8(' ' = 0, 'E' = 1, 'I' = 2), dropoff_ntacode FixedString(4), dropoff_ntaname Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195), dropoff_puma UInt16) ENGINE = MergeTree(pickup_date, pickup_datetime, 8192); ``` On the source server: ``` sql -CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand()) +CREATE TABLE trips_mergetree_x3 AS trips_mergetree_third ENGINE = Distributed(perftest, default, trips_mergetree_third, rand()); ``` The following query redistributes data: ``` sql -INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree +INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree; ``` This takes 2454 seconds. diff --git a/docs/en/getting-started/example-datasets/nypd_complaint_data.md b/docs/en/getting-started/example-datasets/nypd_complaint_data.md index 8b02ac23cf9..a178fe456a6 100644 --- a/docs/en/getting-started/example-datasets/nypd_complaint_data.md +++ b/docs/en/getting-started/example-datasets/nypd_complaint_data.md @@ -16,7 +16,7 @@ While working through this guide you will: The dataset used in this guide comes from the NYC Open Data team, and contains data about "all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD)". At the time of writing, the data file is 166MB, but it is updated regularly. -**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) +**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) **Terms of use**: https://www1.nyc.gov/home/terms-of-use.page ## Prerequisites @@ -35,7 +35,7 @@ The examples in this guide assume that you have saved the TSV file to `${HOME}/N ## Familiarize yourself with the TSV file -Before starting to work with the ClickHouse database familiarize yourself with the data. +Before starting to work with the ClickHouse database familiarize yourself with the data. ### Look at the fields in the source TSV file @@ -47,15 +47,15 @@ clickhouse-local --query \ Sample response ```response -CMPLNT_NUM Nullable(Float64) -ADDR_PCT_CD Nullable(Float64) -BORO_NM Nullable(String) -CMPLNT_FR_DT Nullable(String) -CMPLNT_FR_TM Nullable(String) +CMPLNT_NUM Nullable(Float64) +ADDR_PCT_CD Nullable(Float64) +BORO_NM Nullable(String) +CMPLNT_FR_DT Nullable(String) +CMPLNT_FR_TM Nullable(String) ``` :::tip -Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](../../guides/developer/working-with-json/json-semi-structured.md/#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000` +Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000` you can get a better idea of the content. Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled. @@ -65,46 +65,46 @@ Run this command at your command prompt. You will be using `clickhouse-local` t ```sh clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \ --query \ -"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')" +"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')" ``` Result: ```response -CMPLNT_NUM Nullable(String) -ADDR_PCT_CD Nullable(Float64) -BORO_NM Nullable(String) -CMPLNT_FR_DT Nullable(String) -CMPLNT_FR_TM Nullable(String) -CMPLNT_TO_DT Nullable(String) -CMPLNT_TO_TM Nullable(String) -CRM_ATPT_CPTD_CD Nullable(String) -HADEVELOPT Nullable(String) -HOUSING_PSA Nullable(Float64) -JURISDICTION_CODE Nullable(Float64) -JURIS_DESC Nullable(String) -KY_CD Nullable(Float64) -LAW_CAT_CD Nullable(String) -LOC_OF_OCCUR_DESC Nullable(String) -OFNS_DESC Nullable(String) -PARKS_NM Nullable(String) -PATROL_BORO Nullable(String) -PD_CD Nullable(Float64) -PD_DESC Nullable(String) -PREM_TYP_DESC Nullable(String) -RPT_DT Nullable(String) -STATION_NAME Nullable(String) -SUSP_AGE_GROUP Nullable(String) -SUSP_RACE Nullable(String) -SUSP_SEX Nullable(String) -TRANSIT_DISTRICT Nullable(Float64) -VIC_AGE_GROUP Nullable(String) -VIC_RACE Nullable(String) -VIC_SEX Nullable(String) -X_COORD_CD Nullable(Float64) -Y_COORD_CD Nullable(Float64) -Latitude Nullable(Float64) -Longitude Nullable(Float64) -Lat_Lon Tuple(Nullable(Float64), Nullable(Float64)) +CMPLNT_NUM Nullable(String) +ADDR_PCT_CD Nullable(Float64) +BORO_NM Nullable(String) +CMPLNT_FR_DT Nullable(String) +CMPLNT_FR_TM Nullable(String) +CMPLNT_TO_DT Nullable(String) +CMPLNT_TO_TM Nullable(String) +CRM_ATPT_CPTD_CD Nullable(String) +HADEVELOPT Nullable(String) +HOUSING_PSA Nullable(Float64) +JURISDICTION_CODE Nullable(Float64) +JURIS_DESC Nullable(String) +KY_CD Nullable(Float64) +LAW_CAT_CD Nullable(String) +LOC_OF_OCCUR_DESC Nullable(String) +OFNS_DESC Nullable(String) +PARKS_NM Nullable(String) +PATROL_BORO Nullable(String) +PD_CD Nullable(Float64) +PD_DESC Nullable(String) +PREM_TYP_DESC Nullable(String) +RPT_DT Nullable(String) +STATION_NAME Nullable(String) +SUSP_AGE_GROUP Nullable(String) +SUSP_RACE Nullable(String) +SUSP_SEX Nullable(String) +TRANSIT_DISTRICT Nullable(Float64) +VIC_AGE_GROUP Nullable(String) +VIC_RACE Nullable(String) +VIC_SEX Nullable(String) +X_COORD_CD Nullable(Float64) +Y_COORD_CD Nullable(Float64) +Latitude Nullable(Float64) +Longitude Nullable(Float64) +Lat_Lon Tuple(Nullable(Float64), Nullable(Float64)) New Georeferenced Column Nullable(String) ``` @@ -362,7 +362,7 @@ The dates shown as `1925` above are from errors in the data. There are several The decisions made above on the data types used for the columns are reflected in the table schema below. We also need to decide on the `ORDER BY` and `PRIMARY KEY` used for the table. At least one -of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the +of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the columns to includes in `ORDER BY`, and more information is in the *Next Steps* section at the end of this document. @@ -380,7 +380,7 @@ decide that we would look at the types of crimes reported over time in the five New York City. These fields might be then included in the `ORDER BY`: | Column | Description (from the data dictionary) | -| ----------- | --------------------------------------------------- | +| ----------- | --------------------------------------------------- | | OFNS_DESC | Description of offense corresponding with key code | | RPT_DT | Date event was reported to police | | BORO_NM | The name of the borough in which the incident occurred | @@ -420,7 +420,7 @@ ORDER BY ( borough, offense_description, date_reported ) Putting together the changes to data types and the `ORDER BY` tuple gives this table structure: ```sql -CREATE TABLE NYPD_Complaint ( +CREATE TABLE NYPD_Complaint ( complaint_number String, precinct UInt8, borough LowCardinality(String), @@ -429,7 +429,7 @@ CREATE TABLE NYPD_Complaint ( was_crime_completed String, housing_authority String, housing_level_code UInt32, - jurisdiction_code UInt8, + jurisdiction_code UInt8, jurisdiction LowCardinality(String), offense_code UInt8, offense_level LowCardinality(String), @@ -478,7 +478,7 @@ Query id: 6a5b10bf-9333-4090-b36e-c7f08b1d9e01 Row 1: ────── -partition_key: +partition_key: sorting_key: borough, offense_description, date_reported primary_key: borough, offense_description, date_reported table: NYPD_Complaint @@ -495,7 +495,7 @@ We will use `clickhouse-local` tool for data preprocessing and `clickhouse-clien :::tip `table='input'` appears in the arguments to clickhouse-local below. clickhouse-local takes the provided input (`cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`) and inserts the input into a table. By default the table is named `table`. In this guide the name of the table is set to `input` to make the data flow clearer. The final argument to clickhouse-local is a query that selects from the table (`FROM input`) which is then piped to `clickhouse-client` to populate the table `NYPD_Complaint`. ::: - + ```sql cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \ | clickhouse-local --table='input' --input-format='TSVWithNames' \ @@ -512,12 +512,12 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \ CRM_ATPT_CPTD_CD AS was_crime_completed, HADEVELOPT AS housing_authority_development, HOUSING_PSA AS housing_level_code, - JURISDICTION_CODE AS jurisdiction_code, + JURISDICTION_CODE AS jurisdiction_code, JURIS_DESC AS jurisdiction, KY_CD AS offense_code, LAW_CAT_CD AS offense_level, LOC_OF_OCCUR_DESC AS location_descriptor, - OFNS_DESC AS offense_description, + OFNS_DESC AS offense_description, PARKS_NM AS park_name, PATROL_BORO AS patrol_borough, PD_CD, @@ -529,7 +529,7 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \ SUSP_RACE AS suspect_race, SUSP_SEX AS suspect_sex, TRANSIT_DISTRICT AS transit_district, - VIC_AGE_GROUP AS victim_age_group, + VIC_AGE_GROUP AS victim_age_group, VIC_RACE AS victim_race, VIC_SEX AS victim_sex, X_COORD_CD AS NY_x_coordinate, @@ -538,7 +538,7 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \ Longitude FROM input" \ | clickhouse-client --query='INSERT INTO NYPD_Complaint FORMAT TSV' -``` +``` ## Validate the Data {#validate-data} @@ -560,7 +560,7 @@ Result: │ 208993 │ └─────────┘ -1 row in set. Elapsed: 0.001 sec. +1 row in set. Elapsed: 0.001 sec. ``` The size of the dataset in ClickHouse is just 12% of the original TSV file, compare the size of the original TSV file with the size of the table: @@ -651,4 +651,4 @@ Query id: 8cdcdfd4-908f-4be0-99e3-265722a2ab8d ## Next Steps -[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices. +[A Practical Introduction to Sparse Primary Indexes in ClickHouse](/docs/en/guides/best-practices/sparse-primary-indexes.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices. diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 7a6e78206b9..9efa1afb5c4 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -390,9 +390,9 @@ You can also play with the data in Playground, [example](https://play.clickhouse This performance test was created by Vadim Tkachenko. See: -- https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/ -- https://www.percona.com/blog/2009/10/26/air-traffic-queries-in-luciddb/ -- https://www.percona.com/blog/2009/11/02/air-traffic-queries-in-infinidb-early-alpha/ -- https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/ -- https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/ -- http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html +- https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/ +- https://www.percona.com/blog/2009/10/26/air-traffic-queries-in-luciddb/ +- https://www.percona.com/blog/2009/11/02/air-traffic-queries-in-infinidb-early-alpha/ +- https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/ +- https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/ +- http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index 4cc94c3ce5b..e0a66022d37 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -50,13 +50,13 @@ clickhouse-client --query " This is a showcase how to parse custom CSV, as it requires multiple tunes. Explanation: -- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input.md) to perform preprocessing; -- The structure of CSV file is specified in the argument of the table function `input`; -- The field `num` (row number) is unneeded - we parse it from file and ignore; -- We use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field; -- File is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter; -- Some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped; -- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions.md) function to transform it to Array. +- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input.md) to perform preprocessing; +- The structure of CSV file is specified in the argument of the table function `input`; +- The field `num` (row number) is unneeded - we parse it from file and ignore; +- We use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field; +- File is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter; +- Some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped; +- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions.md) function to transform it to Array. ## Validate the Inserted Data @@ -80,7 +80,7 @@ Result: ### Top Components by the Number of Recipes: -In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join/) function to expand an array into a set of rows. +In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join.md) function to expand an array into a set of rows. Query: @@ -185,7 +185,7 @@ Result: 10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.) ``` -In this example, we involve [has](../../sql-reference/functions/array-functions/#hasarr-elem) function to filter by array elements and sort by the number of directions. +In this example, we involve [has](../../sql-reference/functions/array-functions.md#hasarr-elem) function to filter by array elements and sort by the number of directions. There is a wedding cake that requires the whole 126 steps to produce! Show that directions: diff --git a/docs/en/getting-started/example-datasets/reddit-comments.md b/docs/en/getting-started/example-datasets/reddit-comments.md new file mode 100644 index 00000000000..e1e372746c9 --- /dev/null +++ b/docs/en/getting-started/example-datasets/reddit-comments.md @@ -0,0 +1,636 @@ +--- +slug: /en/getting-started/example-datasets/reddit-comments +sidebar_label: Reddit comments +--- + +# Reddit comments dataset + +This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 7B rows of data. The raw data is in JSON format in compressed `.zst` files and the rows look like the following: + +```json +{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411} +{"created_utc":1134365725,"author_flair_css_class":null,"score":1,"ups":1,"subreddit":"reddit.com","stickied":false,"link_id":"t3_17866","subreddit_id":"t5_6","controversiality":0,"body":"The site states \"What can I use it for? Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more...\", just like any other new breeed of sites that want us to store everything we have on the web. And they even guarantee multiple levels of security and encryption etc. But what prevents these web site operators fom accessing and/or stealing Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more, for competitive or personal gains...? I am pretty sure that most of them are honest, but what's there to prevent me from setting up a good useful site and stealing all your data? Call me paranoid - I am.","retrieved_on":1473738411,"distinguished":null,"gilded":0,"id":"c14","edited":false,"parent_id":"t3_17866","author":"zse7zse","author_flair_text":null} +{"gilded":0,"distinguished":null,"retrieved_on":1473738411,"author":"[deleted]","author_flair_text":null,"edited":false,"id":"c15","parent_id":"t3_17869","subreddit":"reddit.com","score":0,"ups":0,"created_utc":1134366848,"author_flair_css_class":null,"body":"Jython related topics by Frank Wierzbicki","controversiality":0,"subreddit_id":"t5_6","stickied":false,"link_id":"t3_17869"} +{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"[deleted]","edited":false,"parent_id":"t3_17870","id":"c16","subreddit":"reddit.com","created_utc":1134367660,"author_flair_css_class":null,"score":1,"ups":1,"body":"[deleted]","controversiality":0,"stickied":false,"link_id":"t3_17870","subreddit_id":"t5_6"} +{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"rjoseph","edited":false,"id":"c17","parent_id":"t3_17817","subreddit":"reddit.com","author_flair_css_class":null,"created_utc":1134367754,"score":1,"ups":1,"body":"Saft is by far the best extension you could tak onto your Safari","controversiality":0,"link_id":"t3_17817","stickied":false,"subreddit_id":"t5_6"} +``` + +A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket. + +:::note +The following commands were executed on ClickHouse Cloud. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function. +::: + +1. Let's create a table for the Reddit data: + +```sql +CREATE TABLE reddit +( + subreddit LowCardinality(String), + subreddit_id LowCardinality(String), + subreddit_type Enum('public' = 1, 'restricted' = 2, 'user' = 3, 'archived' = 4, 'gold_restricted' = 5, 'private' = 6), + author LowCardinality(String), + body String CODEC(ZSTD(6)), + created_date Date DEFAULT toDate(created_utc), + created_utc DateTime, + retrieved_on DateTime, + id String, + parent_id String, + link_id String, + score Int32, + total_awards_received UInt16, + controversiality UInt8, + gilded UInt8, + collapsed_because_crowd_control UInt8, + collapsed_reason Enum('' = 0, 'comment score below threshold' = 1, 'may be sensitive content' = 2, 'potentially toxic' = 3, 'potentially toxic content' = 4), + distinguished Enum('' = 0, 'moderator' = 1, 'admin' = 2, 'special' = 3), + removal_reason Enum('' = 0, 'legal' = 1), + author_created_utc DateTime, + author_fullname LowCardinality(String), + author_patreon_flair UInt8, + author_premium UInt8, + can_gild UInt8, + can_mod_post UInt8, + collapsed UInt8, + is_submitter UInt8, + _edited String, + locked UInt8, + quarantined UInt8, + no_follow UInt8, + send_replies UInt8, + stickied UInt8, + author_flair_text LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (subreddit, created_date, author); +``` + +:::note +The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `2005-12` to `2023-02`. The compression changes a couple of times though, so the file extensions are not consistent. For example: + +- the file names are initially `RC_2005-12.bz2` to `RC_2017-11.bz2` +- then they look like `RC_2017-12.xz` to `RC_2018-09.xz` +- and finally `RC_2018-10.zst` to `RC_2023-02.zst` +::: + +2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017: + +```sql +INSERT INTO reddit + SELECT * + FROM s3Cluster( + 'default', + 'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz', + 'JSONEachRow' + ); +``` + +If you do not have a cluster, use `s3` instead of `s3Cluster`: + +```sql +INSERT INTO reddit + SELECT * + FROM s3( + 'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz', + 'JSONEachRow' + ); +``` + +3. It will take a while depending on your resources, but when it's done verify it worked: + +```sql +SELECT formatReadableQuantity(count()) +FROM reddit; +``` + +```response +┌─formatReadableQuantity(count())─┐ +│ 85.97 million │ +└─────────────────────────────────┘ +``` + +4. Let's see how many unique subreddits were in December of 2017: + +```sql +SELECT uniqExact(subreddit) +FROM reddit; +``` + +```response +┌─uniqExact(subreddit)─┐ +│ 91613 │ +└──────────────────────┘ + +1 row in set. Elapsed: 1.572 sec. Processed 85.97 million rows, 367.43 MB (54.71 million rows/s., 233.80 MB/s.) +``` + +5. This query returns the top 10 subreddits (in terms of number of comments): + +```sql +SELECT + subreddit, + count() AS c +FROM reddit +GROUP BY subreddit +ORDER BY c DESC +LIMIT 20; +``` + +```response +┌─subreddit───────┬───────c─┐ +│ AskReddit │ 5245881 │ +│ politics │ 1753120 │ +│ nfl │ 1220266 │ +│ nba │ 960388 │ +│ The_Donald │ 931857 │ +│ news │ 796617 │ +│ worldnews │ 765709 │ +│ CFB │ 710360 │ +│ gaming │ 602761 │ +│ movies │ 601966 │ +│ soccer │ 590628 │ +│ Bitcoin │ 583783 │ +│ pics │ 563408 │ +│ StarWars │ 562514 │ +│ funny │ 547563 │ +│ leagueoflegends │ 517213 │ +│ teenagers │ 492020 │ +│ DestinyTheGame │ 477377 │ +│ todayilearned │ 472650 │ +│ videos │ 450581 │ +└─────────────────┴─────────┘ + +20 rows in set. Elapsed: 0.368 sec. Processed 85.97 million rows, 367.43 MB (233.34 million rows/s., 997.25 MB/s.) +``` + +6. Here are the top 10 authors in December of 2017, in terms of number of comments posted: + +```sql +SELECT + author, + count() AS c +FROM reddit +GROUP BY author +ORDER BY c DESC +LIMIT 10; +``` + +```response +┌─author──────────┬───────c─┐ +│ [deleted] │ 5913324 │ +│ AutoModerator │ 784886 │ +│ ImagesOfNetwork │ 83241 │ +│ BitcoinAllBot │ 54484 │ +│ imguralbumbot │ 45822 │ +│ RPBot │ 29337 │ +│ WikiTextBot │ 25982 │ +│ Concise_AMA_Bot │ 19974 │ +│ MTGCardFetcher │ 19103 │ +│ TotesMessenger │ 19057 │ +└─────────────────┴─────────┘ + +10 rows in set. Elapsed: 8.143 sec. Processed 85.97 million rows, 711.05 MB (10.56 million rows/s., 87.32 MB/s.) +``` + +7. We already inserted some data, but we will start over: + +```sql +TRUNCATE TABLE reddit; +``` + +8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. When you're ready, run this command to insert all the rows. (It takes a while - up to 17 hours!) + +```sql +INSERT INTO reddit +SELECT * +FROM s3Cluster( + 'default', + 'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC*', + 'JSONEachRow' + ) +SETTINGS zstd_window_log_max = 31; +``` + +The response looks like: + +```response +0 rows in set. Elapsed: 61187.839 sec. Processed 6.74 billion rows, 2.06 TB (110.17 thousand rows/s., 33.68 MB/s.) +``` + +8. Let's see how many rows were inserted and how much disk space the table is using: + + +```sql +SELECT + sum(rows) AS count, + formatReadableQuantity(count), + formatReadableSize(sum(bytes)) AS disk_size, + formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size +FROM system.parts +WHERE (table = 'reddit') AND active +``` + +Notice the compression of disk storage is about 1/3 of the uncompressed size: + +```response +┌──────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size──┬─uncompressed_size─┐ +│ 6739503568 │ 6.74 billion │ 501.10 GiB │ 1.51 TiB │ +└────────────┴───────────────────────────────────┴────────────┴───────────────────┘ + +1 row in set. Elapsed: 0.010 sec. +``` + +9. The following query shows how many comments, authors and subreddits we have for each month: + +```sql +SELECT + toStartOfMonth(created_utc) AS firstOfMonth, + count() AS c, + bar(c, 0, 50000000, 25) AS bar_count, + uniq(author) AS authors, + bar(authors, 0, 5000000, 25) AS bar_authors, + uniq(subreddit) AS subreddits, + bar(subreddits, 0, 100000, 25) AS bar_subreddits +FROM reddit +GROUP BY firstOfMonth +ORDER BY firstOfMonth ASC; +``` + +This is a substantial query that has to process all 6.74 billion rows, but we still get an impressive response time (about 3 minutes): + +```response +┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬─authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐ +│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │ +│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │ +│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │ +│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │ +│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │ +│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │ +│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │ +│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │ +│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │ +│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │ +│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │ +│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │ +│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │ +│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │ +│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │ +│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │ +│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │ +│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │ +│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │ +│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │ +│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │ +│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │ +│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │ +│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │ +│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │ +│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │ +│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │ +│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │ +│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │ +│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │ +│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │ +│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │ +│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │ +│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │ +│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │ +│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │ +│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │ +│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │ +│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │ +│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │ +│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │ +│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │ +│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │ +│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │ +│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │ +│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │ +│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │ +│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │ +│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │ +│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │ +│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │ +│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │ +│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │ +│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │ +│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │ +│ 2010-07-01 │ 4032737 │ ██ │ 153451 │ ▊ │ 3662 │ ▉ │ +│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │ +│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │ +│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │ +│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │ +│ 2010-12-01 │ 5972642 │ ██▉ │ 245824 │ █▏ │ 4692 │ █▏ │ +│ 2011-01-01 │ 6603329 │ ███▎ │ 270025 │ █▎ │ 5141 │ █▎ │ +│ 2011-02-01 │ 6363114 │ ███▏ │ 277593 │ █▍ │ 5202 │ █▎ │ +│ 2011-03-01 │ 7556165 │ ███▊ │ 314748 │ █▌ │ 5445 │ █▎ │ +│ 2011-04-01 │ 7571398 │ ███▊ │ 329920 │ █▋ │ 6128 │ █▌ │ +│ 2011-05-01 │ 8803949 │ ████▍ │ 365013 │ █▊ │ 6834 │ █▋ │ +│ 2011-06-01 │ 9766511 │ ████▉ │ 393945 │ █▉ │ 7519 │ █▉ │ +│ 2011-07-01 │ 10557466 │ █████▎ │ 424235 │ ██ │ 8293 │ ██ │ +│ 2011-08-01 │ 12316144 │ ██████▏ │ 475326 │ ██▍ │ 9657 │ ██▍ │ +│ 2011-09-01 │ 12150412 │ ██████ │ 503142 │ ██▌ │ 10278 │ ██▌ │ +│ 2011-10-01 │ 13470278 │ ██████▋ │ 548801 │ ██▋ │ 10922 │ ██▋ │ +│ 2011-11-01 │ 13621533 │ ██████▊ │ 574435 │ ██▊ │ 11572 │ ██▉ │ +│ 2011-12-01 │ 14509469 │ ███████▎ │ 622849 │ ███ │ 12335 │ ███ │ +│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │ +│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │ +│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │ +│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │ +│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │ +│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │ +│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │ +│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │ +│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │ +│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │ +│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │ +│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │ +│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │ +│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │ +│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │ +│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │ +│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │ +│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │ +│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │ +│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │ +│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │ +│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │ +│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │ +│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │ +│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │ +│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │ +│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │ +│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │ +│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │ +│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │ +│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │ +│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │ +│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │ +│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │ +│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │ +│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │ +│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │ +│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │ +│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │ +│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │ +│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │ +│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │ +│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │ +│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │ +│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │ +│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │ +│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │ +│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │ +│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │ +│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │ +│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │ +│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │ +│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │ +│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │ +│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │ +│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │ +│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │ +│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │ +│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │ +│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │ +│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │ +│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │ +│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │ +│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │ +│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │ +│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │ +│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │ +│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │ +│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │ +│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │ +│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │ +│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │ +│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │ +│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │ +│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │ +│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │ +│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │ +│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │ +│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │ +│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │ +│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │ +│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │ +│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │ +│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │ +│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │ +│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │ +│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │ +│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │ +│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │ +│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │ +│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │ +│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │ +│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │ +│ 2019-10-01 │ 129771456 │ █████████████████████████ │ 6825690 │ █████████████████████████ │ 144453 │ █████████████████████████ │ +│ 2019-11-01 │ 107990259 │ █████████████████████████ │ 6368286 │ █████████████████████████ │ 141768 │ █████████████████████████ │ +│ 2019-12-01 │ 112895934 │ █████████████████████████ │ 6640902 │ █████████████████████████ │ 148277 │ █████████████████████████ │ +│ 2020-01-01 │ 54354879 │ █████████████████████████ │ 4782339 │ ███████████████████████▉ │ 111658 │ █████████████████████████ │ +│ 2020-02-01 │ 22696923 │ ███████████▎ │ 3135175 │ ███████████████▋ │ 79521 │ ███████████████████▉ │ +│ 2020-03-01 │ 3466677 │ █▋ │ 987960 │ ████▉ │ 40901 │ ██████████▏ │ +└──────────────┴───────────┴───────────────────────────┴─────────┴───────────────────────────┴────────────┴───────────────────────────┘ + +172 rows in set. Elapsed: 184.809 sec. Processed 6.74 billion rows, 89.56 GB (36.47 million rows/s., 484.62 MB/s.) +``` + +10. Here are the top 10 subreddits of 2022: + +```sql +SELECT + subreddit, + count() AS count +FROM reddit +WHERE toYear(created_utc) = 2022 +GROUP BY subreddit +ORDER BY count DESC +LIMIT 10; +``` + +The response is: + +```response +┌─subreddit────────┬───count─┐ +│ AskReddit │ 3858203 │ +│ politics │ 1356782 │ +│ memes │ 1249120 │ +│ nfl │ 883667 │ +│ worldnews │ 866065 │ +│ teenagers │ 777095 │ +│ AmItheAsshole │ 752720 │ +│ dankmemes │ 657932 │ +│ nba │ 514184 │ +│ unpopularopinion │ 473649 │ +└──────────────────┴─────────┘ + +10 rows in set. Elapsed: 27.824 sec. Processed 6.74 billion rows, 53.26 GB (242.22 million rows/s., 1.91 GB/s.) +``` + +11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019: + +```sql +SELECT + subreddit, + newcount - oldcount AS diff +FROM +( + SELECT + subreddit, + count(*) AS newcount + FROM reddit + WHERE toYear(created_utc) = 2019 + GROUP BY subreddit +) +ALL INNER JOIN +( + SELECT + subreddit, + count(*) AS oldcount + FROM reddit + WHERE toYear(created_utc) = 2018 + GROUP BY subreddit +) USING (subreddit) +ORDER BY diff DESC +LIMIT 50 +SETTINGS joined_subquery_requires_alias = 0; +``` + +It looks like memes and teenagers were busy on Reddit in 2019: + +```response +┌─subreddit────────────┬─────diff─┐ +│ memes │ 15368369 │ +│ AskReddit │ 14663662 │ +│ teenagers │ 12266991 │ +│ AmItheAsshole │ 11561538 │ +│ dankmemes │ 11305158 │ +│ unpopularopinion │ 6332772 │ +│ PewdiepieSubmissions │ 5930818 │ +│ Market76 │ 5014668 │ +│ relationship_advice │ 3776383 │ +│ freefolk │ 3169236 │ +│ Minecraft │ 3160241 │ +│ classicwow │ 2907056 │ +│ Animemes │ 2673398 │ +│ gameofthrones │ 2402835 │ +│ PublicFreakout │ 2267605 │ +│ ShitPostCrusaders │ 2207266 │ +│ RoastMe │ 2195715 │ +│ gonewild │ 2148649 │ +│ AnthemTheGame │ 1803818 │ +│ entitledparents │ 1706270 │ +│ MortalKombat │ 1679508 │ +│ Cringetopia │ 1620555 │ +│ pokemon │ 1615266 │ +│ HistoryMemes │ 1608289 │ +│ Brawlstars │ 1574977 │ +│ iamatotalpieceofshit │ 1558315 │ +│ trashy │ 1518549 │ +│ ChapoTrapHouse │ 1505748 │ +│ Pikabu │ 1501001 │ +│ Showerthoughts │ 1475101 │ +│ cursedcomments │ 1465607 │ +│ ukpolitics │ 1386043 │ +│ wallstreetbets │ 1384431 │ +│ interestingasfuck │ 1378900 │ +│ wholesomememes │ 1353333 │ +│ AskOuija │ 1233263 │ +│ borderlands3 │ 1197192 │ +│ aww │ 1168257 │ +│ insanepeoplefacebook │ 1155473 │ +│ FortniteCompetitive │ 1122778 │ +│ EpicSeven │ 1117380 │ +│ FreeKarma4U │ 1116423 │ +│ YangForPresidentHQ │ 1086700 │ +│ SquaredCircle │ 1044089 │ +│ MurderedByWords │ 1042511 │ +│ AskMen │ 1024434 │ +│ thedivision │ 1016634 │ +│ barstoolsports │ 985032 │ +│ nfl │ 978340 │ +│ BattlefieldV │ 971408 │ +└──────────────────────┴──────────┘ + +50 rows in set. Elapsed: 65.954 sec. Processed 13.48 billion rows, 79.67 GB (204.37 million rows/s., 1.21 GB/s.) +``` + +12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all the comments three times for a substring, and unfortunately ClickHouse user are obviously not very active on Reddit yet: + +```sql +SELECT + toStartOfQuarter(created_utc) AS quarter, + sum(if(positionCaseInsensitive(body, 'clickhouse') > 0, 1, 0)) AS clickhouse, + sum(if(positionCaseInsensitive(body, 'snowflake') > 0, 1, 0)) AS snowflake, + sum(if(positionCaseInsensitive(body, 'postgres') > 0, 1, 0)) AS postgres +FROM reddit +GROUP BY quarter +ORDER BY quarter ASC; +``` + +```response +┌────Quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐ +│ 2005-10-01 │ 0 │ 0 │ 0 │ +│ 2006-01-01 │ 0 │ 2 │ 23 │ +│ 2006-04-01 │ 0 │ 2 │ 24 │ +│ 2006-07-01 │ 0 │ 4 │ 13 │ +│ 2006-10-01 │ 0 │ 23 │ 73 │ +│ 2007-01-01 │ 0 │ 14 │ 91 │ +│ 2007-04-01 │ 0 │ 10 │ 59 │ +│ 2007-07-01 │ 0 │ 39 │ 116 │ +│ 2007-10-01 │ 0 │ 45 │ 125 │ +│ 2008-01-01 │ 0 │ 53 │ 234 │ +│ 2008-04-01 │ 0 │ 79 │ 303 │ +│ 2008-07-01 │ 0 │ 102 │ 174 │ +│ 2008-10-01 │ 0 │ 156 │ 323 │ +│ 2009-01-01 │ 0 │ 206 │ 208 │ +│ 2009-04-01 │ 0 │ 178 │ 417 │ +│ 2009-07-01 │ 0 │ 300 │ 295 │ +│ 2009-10-01 │ 0 │ 633 │ 589 │ +│ 2010-01-01 │ 0 │ 555 │ 501 │ +│ 2010-04-01 │ 0 │ 587 │ 469 │ +│ 2010-07-01 │ 0 │ 770 │ 821 │ +│ 2010-10-01 │ 0 │ 1480 │ 550 │ +│ 2011-01-01 │ 0 │ 1482 │ 568 │ +│ 2011-04-01 │ 0 │ 1558 │ 406 │ +│ 2011-07-01 │ 0 │ 2163 │ 628 │ +│ 2011-10-01 │ 0 │ 4064 │ 566 │ +│ 2012-01-01 │ 0 │ 4621 │ 662 │ +│ 2012-04-01 │ 0 │ 5737 │ 785 │ +│ 2012-07-01 │ 0 │ 6097 │ 1127 │ +│ 2012-10-01 │ 0 │ 7986 │ 600 │ +│ 2013-01-01 │ 0 │ 9704 │ 839 │ +│ 2013-04-01 │ 0 │ 8161 │ 853 │ +│ 2013-07-01 │ 0 │ 9704 │ 1028 │ +│ 2013-10-01 │ 0 │ 12879 │ 1404 │ +│ 2014-01-01 │ 0 │ 12317 │ 1548 │ +│ 2014-04-01 │ 0 │ 13181 │ 1577 │ +│ 2014-07-01 │ 0 │ 15640 │ 1710 │ +│ 2014-10-01 │ 0 │ 19479 │ 1959 │ +│ 2015-01-01 │ 0 │ 20411 │ 2104 │ +│ 2015-04-01 │ 1 │ 20309 │ 9112 │ +│ 2015-07-01 │ 0 │ 20325 │ 4771 │ +│ 2015-10-01 │ 0 │ 25087 │ 3030 │ +│ 2016-01-01 │ 0 │ 23462 │ 3126 │ +│ 2016-04-01 │ 3 │ 25496 │ 2757 │ +│ 2016-07-01 │ 4 │ 28233 │ 2928 │ +│ 2016-10-01 │ 2 │ 45445 │ 2449 │ +│ 2017-01-01 │ 9 │ 76019 │ 2808 │ +│ 2017-04-01 │ 9 │ 67919 │ 2803 │ +│ 2017-07-01 │ 13 │ 68974 │ 2771 │ +│ 2017-10-01 │ 12 │ 69730 │ 2906 │ +│ 2018-01-01 │ 17 │ 67476 │ 3152 │ +│ 2018-04-01 │ 3 │ 67139 │ 3986 │ +│ 2018-07-01 │ 14 │ 67979 │ 3609 │ +│ 2018-10-01 │ 28 │ 74147 │ 3850 │ +│ 2019-01-01 │ 14 │ 80250 │ 4305 │ +│ 2019-04-01 │ 30 │ 70307 │ 3872 │ +│ 2019-07-01 │ 33 │ 77149 │ 4164 │ +│ 2019-10-01 │ 13 │ 76746 │ 3541 │ +│ 2020-01-01 │ 16 │ 54475 │ 846 │ +└────────────┴────────────┴───────────┴──────────┘ + +58 rows in set. Elapsed: 2663.751 sec. Processed 6.74 billion rows, 1.21 TB (2.53 million rows/s., 454.37 MB/s.) +``` \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/star-schema.md b/docs/en/getting-started/example-datasets/star-schema.md index 1702be70410..72ced87ef55 100644 --- a/docs/en/getting-started/example-datasets/star-schema.md +++ b/docs/en/getting-started/example-datasets/star-schema.md @@ -18,7 +18,7 @@ $ make Generating data: -:::warning +:::note With `-s 100` dbgen generates 600 million rows (67 GB), while while `-s 1000` it generates 6 billion rows (which takes a lot of time) ::: diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index 2a89bfda2e7..8ed79c3986f 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -1,17 +1,17 @@ --- slug: /en/getting-started/example-datasets/uk-price-paid -sidebar_label: UK Property Price Paid +sidebar_label: UK Property Prices sidebar_position: 1 -title: "UK Property Price Paid" --- -The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995. -The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse. +# The UK property prices dataset -Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads -Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data +Projections are a great way to improve the performance of queries that you run frequently. We will demonstrate the power of projections +using the UK property dataset, which contains data about prices paid for real-estate property in England and Wales. The data is available since 1995, and the size of the dataset in uncompressed form is about 4 GiB (which will only take about 278 MiB in ClickHouse). -Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0. +- Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads +- Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data +- Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0. ## Create the Table {#create-table} diff --git a/docs/en/getting-started/example-datasets/youtube-dislikes.md b/docs/en/getting-started/example-datasets/youtube-dislikes.md new file mode 100644 index 00000000000..5f4ef696b8b --- /dev/null +++ b/docs/en/getting-started/example-datasets/youtube-dislikes.md @@ -0,0 +1,487 @@ +--- +slug: /en/getting-started/example-datasets/youtube-dislikes +sidebar_label: YouTube Dislikes +description: A collection is dislikes of YouTube videos. +--- + +# YouTube dataset of dislikes + +In November of 2021, YouTube removed the public ***dislike*** count from all of its videos. While creators can still see the number of dislikes, viewers can only see how many ***likes*** a video has received. + +:::important +The dataset has over 4.55 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). +::: + +The data is in a JSON format and can be downloaded from [archive.org](https://archive.org/download/dislikes_youtube_2021_12_video_json_files). We have made this same data available in S3 so that it can be downloaded more efficiently into a ClickHouse Cloud instance. + +Here are the steps to create a table in ClickHouse Cloud and insert the data. + +:::note +The steps below will easily work on a local install of ClickHouse too. The only change would be to use the `s3` function instead of `s3cluster` (unless you have a cluster configured - in which case change `default` to the name of your cluster). +::: + +## Step-by-step instructions + +1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult: + +```sql +DESCRIBE s3Cluster( + 'default', + 'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst', + 'JSONLines' +); +``` + +ClickHouse infers the following schema from the JSON file: + +```response +┌─name────────────────┬─type─────────────────────────────────┐ +│ id │ Nullable(String) │ +│ fetch_date │ Nullable(Int64) │ +│ upload_date │ Nullable(String) │ +│ title │ Nullable(String) │ +│ uploader_id │ Nullable(String) │ +│ uploader │ Nullable(String) │ +│ uploader_sub_count │ Nullable(Int64) │ +│ is_age_limit │ Nullable(Bool) │ +│ view_count │ Nullable(Int64) │ +│ like_count │ Nullable(Int64) │ +│ dislike_count │ Nullable(Int64) │ +│ is_crawlable │ Nullable(Bool) │ +│ is_live_content │ Nullable(Bool) │ +│ has_subtitles │ Nullable(Bool) │ +│ is_ads_enabled │ Nullable(Bool) │ +│ is_comments_enabled │ Nullable(Bool) │ +│ description │ Nullable(String) │ +│ rich_metadata │ Array(Map(String, Nullable(String))) │ +│ super_titles │ Array(Map(String, Nullable(String))) │ +│ uploader_badges │ Nullable(String) │ +│ video_badges │ Nullable(String) │ +└─────────────────────┴──────────────────────────────────────┘ +``` + +2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table: + +```sql +CREATE TABLE youtube +( + `id` String, + `fetch_date` DateTime, + `upload_date_str` String, + `upload_date` Date, + `title` String, + `uploader_id` String, + `uploader` String, + `uploader_sub_count` Int64, + `is_age_limit` Bool, + `view_count` Int64, + `like_count` Int64, + `dislike_count` Int64, + `is_crawlable` Bool, + `has_subtitles` Bool, + `is_ads_enabled` Bool, + `is_comments_enabled` Bool, + `description` String, + `rich_metadata` Array(Map(String, String)), + `super_titles` Array(Map(String, String)), + `uploader_badges` String, + `video_badges` String +) +ENGINE = MergeTree +ORDER BY (uploader, upload_date); +``` + +3. The following command streams the records from the S3 files into the `youtube` table. + +:::important +This inserts a lot of data - 4.65 billion rows. If you do not want the entire dataset, simply add a `LIMIT` clause with the desired number of rows. +::: + +```sql +INSERT INTO youtube +SETTINGS input_format_null_as_default = 1 +SELECT + id, + parseDateTimeBestEffortUSOrZero(toString(fetch_date)) AS fetch_date, + upload_date AS upload_date_str, + toDate(parseDateTimeBestEffortUSOrZero(upload_date::String)) AS upload_date, + ifNull(title, '') AS title, + uploader_id, + ifNull(uploader, '') AS uploader, + uploader_sub_count, + is_age_limit, + view_count, + like_count, + dislike_count, + is_crawlable, + has_subtitles, + is_ads_enabled, + is_comments_enabled, + ifNull(description, '') AS description, + rich_metadata, + super_titles, + ifNull(uploader_badges, '') AS uploader_badges, + ifNull(video_badges, '') AS video_badges +FROM s3( + 'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst', + 'JSONLines' +) +``` + +Some comments about our `INSERT` command: + +- The `parseDateTimeBestEffortUSOrZero` function is handy when the incoming date fields may not be in the proper format. If `fetch_date` does not get parsed properly, it will be set to `0` +- The `upload_date` column contains valid dates, but it also contains strings like "4 hours ago" - which is certainly not a valid date. We decided to store the original value in `upload_date_str` and attempt to parse it with `toDate(parseDateTimeBestEffortUSOrZero(upload_date::String))`. If the parsing fails we just get `0` +- We used `ifNull` to avoid getting `NULL` values in our table. If an incoming value is `NULL`, the `ifNull` function is setting the value to an empty string + +4. Open a new tab in the SQL Console of ClickHouse Cloud (or a new `clickhouse-client` window) and watch the count increase. It will take a while to insert 4.56B rows, depending on your server resources. (Without any tweaking of settings, it takes about 4.5 hours.) + +```sql +SELECT formatReadableQuantity(count()) +FROM youtube +``` + +```response +┌─formatReadableQuantity(count())─┐ +│ 4.56 billion │ +└─────────────────────────────────┘ +``` + +5. Once the data is inserted, go ahead and count the number of dislikes of your favorite videos or channels. Let's see how many videos were uploaded by ClickHouse: + +```sql +SELECT count() +FROM youtube +WHERE uploader = 'ClickHouse'; +``` + +```response +┌─count()─┐ +│ 84 │ +└─────────┘ + +1 row in set. Elapsed: 0.570 sec. Processed 237.57 thousand rows, 5.77 MB (416.54 thousand rows/s., 10.12 MB/s.) +``` + +:::note +The query above runs so quickly because we chose `uploader` as the first column of the primary key - so it only had to process 237k rows. +::: + +6. Let's look and likes and dislikes of ClickHouse videos: + +```sql +SELECT + title, + like_count, + dislike_count +FROM youtube +WHERE uploader = 'ClickHouse' +ORDER BY dislike_count DESC; +``` + +The response looks like: + +```response +┌─title────────────────────────────────────────────────────────────────────────────────────────────────┬─like_count─┬─dislike_count─┐ +│ ClickHouse v21.11 Release Webinar │ 52 │ 3 │ +│ ClickHouse Introduction │ 97 │ 3 │ +│ Casa Modelo Algarve │ 180 │ 3 │ +│ Профайлер запросов: трудный путь │ 33 │ 3 │ +│ ClickHouse в Курсометре │ 4 │ 2 │ +│ 10 Good Reasons to Use ClickHouse │ 27 │ 2 │ +... + +84 rows in set. Elapsed: 0.013 sec. Processed 155.65 thousand rows, 16.94 MB (11.96 million rows/s., 1.30 GB/s.) +``` + +7. Here is a search for videos with **ClickHouse** in the `title` or `description` fields: + +```sql +SELECT + view_count, + like_count, + dislike_count, + concat('https://youtu.be/', id) AS url, + title +FROM youtube +WHERE (title ILIKE '%ClickHouse%') OR (description ILIKE '%ClickHouse%') +ORDER BY + like_count DESC, + view_count DESC; +``` + +This query has to process every row, and also parse through two columns of strings. Even then, we get decent performance at 4.15M rows/second: + +```response +1174 rows in set. Elapsed: 1099.368 sec. Processed 4.56 billion rows, 1.98 TB (4.15 million rows/s., 1.80 GB/s.) +``` + +The results look like: + +```response +┌─view_count─┬─like_count─┬─dislike_count─┬─url──────────────────────────┬─title──────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 1919 │ 63 │ 1 │ https://youtu.be/b9MeoOtAivQ │ ClickHouse v21.10 Release Webinar │ +│ 8710 │ 62 │ 4 │ https://youtu.be/PeV1mC2z--M │ What is JDBC DriverManager? | JDBC │ +│ 3534 │ 62 │ 1 │ https://youtu.be/8nWRhK9gw10 │ CLICKHOUSE - Arquitetura Modular │ +``` + +## Questions + +### If someone disables comments does it lower the chance someone will actually click like or dislike? + +When commenting is disabled, are people more likely to like or dislike to express their feelings about a video? + +```sql +SELECT + concat('< ', formatReadableQuantity(view_range)) AS views, + is_comments_enabled, + total_clicks / num_views AS prob_like_dislike +FROM +( + SELECT + is_comments_enabled, + power(10, CEILING(log10(view_count + 1))) AS view_range, + sum(like_count + dislike_count) AS total_clicks, + sum(view_count) AS num_views + FROM youtube + GROUP BY + view_range, + is_comments_enabled +) WHERE view_range > 1 +ORDER BY + is_comments_enabled ASC, + num_views ASC; +``` + +```response +┌─views─────────────┬─is_comments_enabled─┬────prob_like_dislike─┐ +│ < 10.00 │ false │ 0.08224180712685371 │ +│ < 100.00 │ false │ 0.06346337759167248 │ +│ < 1.00 thousand │ false │ 0.03201883652987105 │ +│ < 10.00 thousand │ false │ 0.01716073540410903 │ +│ < 10.00 billion │ false │ 0.004555639481829971 │ +│ < 100.00 thousand │ false │ 0.01293351460515323 │ +│ < 1.00 billion │ false │ 0.004761811192464957 │ +│ < 1.00 million │ false │ 0.010472604018980551 │ +│ < 10.00 million │ false │ 0.00788902538420125 │ +│ < 100.00 million │ false │ 0.00579152804250582 │ +│ < 10.00 │ true │ 0.09819517478134059 │ +│ < 100.00 │ true │ 0.07403784478585775 │ +│ < 1.00 thousand │ true │ 0.03846294910067627 │ +│ < 10.00 billion │ true │ 0.005615217329358215 │ +│ < 10.00 thousand │ true │ 0.02505881391701455 │ +│ < 1.00 billion │ true │ 0.007434998802482997 │ +│ < 100.00 thousand │ true │ 0.022694648130822004 │ +│ < 100.00 million │ true │ 0.011761563746575625 │ +│ < 1.00 million │ true │ 0.020776022304589435 │ +│ < 10.00 million │ true │ 0.016917095718089584 │ +└───────────────────┴─────────────────────┴──────────────────────┘ + +22 rows in set. Elapsed: 8.460 sec. Processed 4.56 billion rows, 77.48 GB (538.73 million rows/s., 9.16 GB/s.) +``` + +Enabling comments seems to be correlated with a higher rate of engagement. + + +### How does the number of videos change over time - notable events? + +```sql +SELECT + toStartOfMonth(toDateTime(upload_date)) AS month, + uniq(uploader_id) AS uploaders, + count() as num_videos, + sum(view_count) as view_count +FROM youtube +GROUP BY month +ORDER BY month ASC; +``` + +```response +┌──────month─┬─uploaders─┬─num_videos─┬───view_count─┐ +│ 2005-04-01 │ 5 │ 6 │ 213597737 │ +│ 2005-05-01 │ 6 │ 9 │ 2944005 │ +│ 2005-06-01 │ 165 │ 351 │ 18624981 │ +│ 2005-07-01 │ 395 │ 1168 │ 94164872 │ +│ 2005-08-01 │ 1171 │ 3128 │ 124540774 │ +│ 2005-09-01 │ 2418 │ 5206 │ 475536249 │ +│ 2005-10-01 │ 6750 │ 13747 │ 737593613 │ +│ 2005-11-01 │ 13706 │ 28078 │ 1896116976 │ +│ 2005-12-01 │ 24756 │ 49885 │ 2478418930 │ +│ 2006-01-01 │ 49992 │ 100447 │ 4532656581 │ +│ 2006-02-01 │ 67882 │ 138485 │ 5677516317 │ +│ 2006-03-01 │ 103358 │ 212237 │ 8430301366 │ +│ 2006-04-01 │ 114615 │ 234174 │ 9980760440 │ +│ 2006-05-01 │ 152682 │ 332076 │ 14129117212 │ +│ 2006-06-01 │ 193962 │ 429538 │ 17014143263 │ +│ 2006-07-01 │ 234401 │ 530311 │ 18721143410 │ +│ 2006-08-01 │ 281280 │ 614128 │ 20473502342 │ +│ 2006-09-01 │ 312434 │ 679906 │ 23158422265 │ +│ 2006-10-01 │ 404873 │ 897590 │ 27357846117 │ +``` + +A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty). + + +### More subtitiles over time and when + +With advances in speech recognition, it’s easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then? + +```sql +SELECT + toStartOfMonth(upload_date) AS month, + countIf(has_subtitles) / count() AS percent_subtitles, + percent_subtitles - any(percent_subtitles) OVER ( + ORDER BY month ASC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) AS previous +FROM youtube +GROUP BY month +ORDER BY month ASC; +``` + +```response +┌──────month─┬───percent_subtitles─┬────────────────previous─┐ +│ 2015-01-01 │ 0.2652653881082824 │ 0.2652653881082824 │ +│ 2015-02-01 │ 0.3147556050309162 │ 0.049490216922633834 │ +│ 2015-03-01 │ 0.32460464492371877 │ 0.009849039892802558 │ +│ 2015-04-01 │ 0.33471963051468445 │ 0.010114985590965686 │ +│ 2015-05-01 │ 0.3168087575501062 │ -0.017910872964578273 │ +│ 2015-06-01 │ 0.3162609788438222 │ -0.0005477787062839745 │ +│ 2015-07-01 │ 0.31828767677518033 │ 0.0020266979313581235 │ +│ 2015-08-01 │ 0.3045551564286859 │ -0.013732520346494415 │ +│ 2015-09-01 │ 0.311221133995152 │ 0.006665977566466086 │ +│ 2015-10-01 │ 0.30574870926812175 │ -0.005472424727030245 │ +│ 2015-11-01 │ 0.31125409712077234 │ 0.0055053878526505895 │ +│ 2015-12-01 │ 0.3190967954651779 │ 0.007842698344405541 │ +│ 2016-01-01 │ 0.32636021432496176 │ 0.007263418859783877 │ + +``` + +The data results show a spike in 2009. Apparently at that, time YouTube was removing their community captions feature, which allowed you to upload captions for other people's video. +This prompted a very successful campaign to have creators add captions to their videos for hard of hearing and deaf viewers. + + +### Top uploaders over time + +```sql +WITH uploaders AS + ( + SELECT uploader + FROM youtube + GROUP BY uploader + ORDER BY sum(view_count) DESC + LIMIT 10 + ) +SELECT + month, + uploader, + sum(view_count) AS total_views, + avg(dislike_count / like_count) AS like_to_dislike_ratio +FROM youtube +WHERE uploader IN (uploaders) +GROUP BY + toStartOfMonth(upload_date) AS month, + uploader +ORDER BY + month ASC, + total_views DESC; +``` + +```response +┌──────month─┬─uploader───────────────────┬─total_views─┬─like_to_dislike_ratio─┐ +│ 1970-01-01 │ T-Series │ 10957099 │ 0.022784656361208206 │ +│ 1970-01-01 │ Ryan's World │ 0 │ 0.003035559410234172 │ +│ 1970-01-01 │ SET India │ 0 │ nan │ +│ 2006-09-01 │ Cocomelon - Nursery Rhymes │ 256406497 │ 0.7005566715978622 │ +│ 2007-06-01 │ Cocomelon - Nursery Rhymes │ 33641320 │ 0.7088650914344298 │ +│ 2008-02-01 │ WWE │ 43733469 │ 0.07198856488734842 │ +│ 2008-03-01 │ WWE │ 16514541 │ 0.1230603715431997 │ +│ 2008-04-01 │ WWE │ 5907295 │ 0.2089399470159618 │ +│ 2008-05-01 │ WWE │ 7779627 │ 0.09101676560436774 │ +│ 2008-06-01 │ WWE │ 7018780 │ 0.0974184753155297 │ +│ 2008-07-01 │ WWE │ 4686447 │ 0.1263845422065158 │ +│ 2008-08-01 │ WWE │ 4514312 │ 0.08384574274791441 │ +│ 2008-09-01 │ WWE │ 3717092 │ 0.07872802579349912 │ +``` + +### How do like ratio changes as views go up? + +```sql +SELECT + concat('< ', formatReadableQuantity(view_range)) AS view_range, + is_comments_enabled, + round(like_ratio, 2) AS like_ratio +FROM +( +SELECT + power(10, CEILING(log10(view_count + 1))) as view_range, + is_comments_enabled, + avg(like_count / dislike_count) as like_ratio +FROM youtube WHERE dislike_count > 0 +GROUP BY + view_range, + is_comments_enabled HAVING view_range > 1 +ORDER BY + view_range ASC, + is_comments_enabled ASC +); +``` + +```response +┌─view_range────────┬─is_comments_enabled─┬─like_ratio─┐ +│ < 10.00 │ false │ 0.66 │ +│ < 10.00 │ true │ 0.66 │ +│ < 100.00 │ false │ 3 │ +│ < 100.00 │ true │ 3.95 │ +│ < 1.00 thousand │ false │ 8.45 │ +│ < 1.00 thousand │ true │ 13.07 │ +│ < 10.00 thousand │ false │ 18.57 │ +│ < 10.00 thousand │ true │ 30.92 │ +│ < 100.00 thousand │ false │ 23.55 │ +│ < 100.00 thousand │ true │ 42.13 │ +│ < 1.00 million │ false │ 19.23 │ +│ < 1.00 million │ true │ 37.86 │ +│ < 10.00 million │ false │ 12.13 │ +│ < 10.00 million │ true │ 30.72 │ +│ < 100.00 million │ false │ 6.67 │ +│ < 100.00 million │ true │ 23.32 │ +│ < 1.00 billion │ false │ 3.08 │ +│ < 1.00 billion │ true │ 20.69 │ +│ < 10.00 billion │ false │ 1.77 │ +│ < 10.00 billion │ true │ 19.5 │ +└───────────────────┴─────────────────────┴────────────┘ +``` + +### How are views distributed? + +```sql +SELECT + labels AS percentile, + round(quantiles) AS views +FROM +( + SELECT + quantiles(0.999, 0.99, 0.95, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1)(view_count) AS quantiles, + ['99.9th', '99th', '95th', '90th', '80th', '70th','60th', '50th', '40th', '30th', '20th', '10th'] AS labels + FROM youtube +) +ARRAY JOIN + quantiles, + labels; +``` + +```response +┌─percentile─┬───views─┐ +│ 99.9th │ 1216624 │ +│ 99th │ 143519 │ +│ 95th │ 13542 │ +│ 90th │ 4054 │ +│ 80th │ 950 │ +│ 70th │ 363 │ +│ 60th │ 177 │ +│ 50th │ 97 │ +│ 40th │ 57 │ +│ 30th │ 32 │ +│ 20th │ 16 │ +│ 10th │ 6 │ +└────────────┴─────────┘ +``` \ No newline at end of file diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 0867f3a0795..3f6c2577c94 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -14,75 +14,35 @@ import CodeBlock from '@theme/CodeBlock'; You have three options for getting up and running with ClickHouse: - **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse -- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture -- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub +- **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse +- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture +- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub ## ClickHouse Cloud The quickest and easiest way to get up and running with ClickHouse is to create a new service in [ClickHouse Cloud](https://clickhouse.cloud/). -## Self-Managed Install +## Quick Install :::tip For production installs of a specific release version see the [installation options](#available-installation-options) down below. ::: - - +On Linux and macOS: -1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: +1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local, +ClickHouse Keeper, and other tools: ```bash curl https://clickhouse.com/ | sh ``` -1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script: - - ```bash - sudo ./clickhouse install - ``` - -1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank: - - ```response - Creating log directory /var/log/clickhouse-server. - Creating data directory /var/lib/clickhouse. - Creating pid directory /var/run/clickhouse-server. - chown -R clickhouse:clickhouse '/var/log/clickhouse-server' - chown -R clickhouse:clickhouse '/var/run/clickhouse-server' - chown clickhouse:clickhouse '/var/lib/clickhouse' - Enter password for default user: - ``` - You should see the following output: - - ```response - ClickHouse has been successfully installed. - - Start clickhouse-server with: - sudo clickhouse start - - Start clickhouse-client with: - clickhouse-client - ``` - 1. Run the following command to start the ClickHouse server: ```bash - sudo clickhouse start + ./clickhouse server ``` - - - -1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: - ```bash - curl https://clickhouse.com/ | sh - ``` - -1. Run the ClickHouse server: - - ```bash - ./clickhouse server - ``` + The first time you run this script, the necessary files and folders are created in the current directory, then the server starts. 1. Open a new terminal and use the **clickhouse-client** to connect to your service: @@ -101,15 +61,14 @@ For production installs of a specific release version see the [installation opti You are ready to start sending DDL and SQL commands to ClickHouse! - - - :::tip -The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data. +The [Quick Start](/docs/en/quick-start.mdx) walks through the steps for creating tables and inserting data. ::: -## Available Installation Options {#available-installation-options} +## Production Deployments {#available-installation-options} + +For production deployments of ClickHouse, choose from one of the following install options. ### From DEB Packages {#install-from-deb-packages} @@ -118,9 +77,12 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun #### Setup the Debian repository ``` bash sudo apt-get install -y apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +GNUPGHOME=$(mktemp -d) +sudo GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 +sudo rm -r "$GNUPGHOME" +sudo chmod +r /usr/share/keyrings/clickhouse-keyring.gpg -echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ +echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update ``` @@ -174,15 +136,16 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. -You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs. +You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs. You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/). #### Install standalone ClickHouse Keeper :::tip -If you are going to run ClickHouse Keeper on the same server as ClickHouse server you -do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers. +In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes. +In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server. +This command is only needed on standalone ClickHouse Keeper servers. ::: ```bash @@ -199,11 +162,11 @@ sudo systemctl status clickhouse-keeper #### Packages {#packages} -- `clickhouse-common-static` — Installs ClickHouse compiled binary files. -- `clickhouse-server` — Creates a symbolic link for `clickhouse-server` and installs the default server configuration. -- `clickhouse-client` — Creates a symbolic link for `clickhouse-client` and other client-related tools. and installs client configuration files. -- `clickhouse-common-static-dbg` — Installs ClickHouse compiled binary files with debug info. -- `clickhouse-keeper` - Used to install ClickHouse Keeper on dedicated ClickHouse Keeper nodes. If you are running ClickHouse Keeper on the same server as ClickHouse server, then you do not need to install this package. Installs ClickHouse Keeper and the default ClickHouse Keeper configuration files. +- `clickhouse-common-static` — Installs ClickHouse compiled binary files. +- `clickhouse-server` — Creates a symbolic link for `clickhouse-server` and installs the default server configuration. +- `clickhouse-client` — Creates a symbolic link for `clickhouse-client` and other client-related tools. and installs client configuration files. +- `clickhouse-common-static-dbg` — Installs ClickHouse compiled binary files with debug info. +- `clickhouse-keeper` - Used to install ClickHouse Keeper on dedicated ClickHouse Keeper nodes. If you are running ClickHouse Keeper on the same server as ClickHouse server, then you do not need to install this package. Installs ClickHouse Keeper and the default ClickHouse Keeper configuration files. :::info If you need to install specific version of ClickHouse you have to install all packages with the same version: @@ -222,6 +185,15 @@ sudo yum install -y yum-utils sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo ``` +For systems with `zypper` package manager (openSUSE, SLES): + +``` bash +sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g +sudo zypper --gpg-auto-import-keys refresh clickhouse-stable +``` + +Later any `yum install` can be replaced by `zypper install`. To specify a particular version, add `-$VERSION` to the end of the package name, e.g. `clickhouse-client-22.2.2.22`. + #### Install ClickHouse server and client ```bash @@ -240,8 +212,9 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. #### Install standalone ClickHouse Keeper :::tip -If you are going to run ClickHouse Keeper on the same server as ClickHouse server you -do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers. +In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes. +In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server. +This command is only needed on standalone ClickHouse Keeper servers. ::: ```bash @@ -272,7 +245,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. -You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs. +You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs. Then run these commands to install packages: @@ -467,8 +440,8 @@ We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The C The required volume of RAM generally depends on: -- The complexity of queries. -- The amount of data that is processed in queries. +- The complexity of queries. +- The amount of data that is processed in queries. To calculate the required volume of RAM, you may estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. @@ -480,11 +453,11 @@ The ClickHouse binary requires at least 2.5 GB of disk space for installation. The volume of storage required for your data may be calculated separately based on -- an estimation of the data volume. +- an estimation of the data volume. You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. -- The data compression coefficient. +- The data compression coefficient. To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index e995ea6ef8b..6a6d4092177 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -1,5 +1,5 @@ --- -sidebar_label: Playground +sidebar_label: ClickHouse Playground sidebar_position: 2 keywords: [clickhouse, playground, getting, started, docs] description: The ClickHouse Playground allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. @@ -11,7 +11,7 @@ slug: /en/getting-started/playground [ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. Several example datasets are available in Playground. -You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces). +You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../integrations/index.mdx). ## Credentials {#credentials} @@ -26,8 +26,8 @@ You can make queries to Playground using any HTTP client, for example [curl](htt The queries are executed as a read-only user. It implies some limitations: -- DDL queries are not allowed -- INSERT queries are not allowed +- DDL queries are not allowed +- INSERT queries are not allowed The service also have quotas on its usage. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index e3b40d83efe..f670d464006 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -119,7 +119,7 @@ When processing a query, the client shows: 1. Progress, which is updated no more than 10 times per second (by default). For quick queries, the progress might not have time to be displayed. 2. The formatted query after parsing, for debugging. 3. The result in the specified format. -4. The number of lines in the result, the time passed, and the average speed of query processing. +4. The number of lines in the result, the time passed, and the average speed of query processing. All data amounts refer to uncompressed data. You can cancel a long query by pressing Ctrl+C. However, you will still need to wait for a little for the server to abort the request. It is not possible to cancel a query at certain stages. If you do not wait and press Ctrl+C a second time, the client will exit. @@ -148,8 +148,8 @@ Format a query as usual, then place the values that you want to pass from the ap {:} ``` -- `name` — Placeholder identifier. In the console client it should be used in app parameters as `--param_ = value`. -- `data type` — [Data type](../sql-reference/data-types/index.md) of the app parameter value. For example, a data structure like `(integer, ('string', integer))` can have the `Tuple(UInt8, Tuple(String, UInt8))` data type (you can also use another [integer](../sql-reference/data-types/int-uint.md) types). It's also possible to pass table, database, column names as a parameter, in that case you would need to use `Identifier` as a data type. +- `name` — Placeholder identifier. In the console client it should be used in app parameters as `--param_ = value`. +- `data type` — [Data type](../sql-reference/data-types/index.md) of the app parameter value. For example, a data structure like `(integer, ('string', integer))` can have the `Tuple(UInt8, Tuple(String, UInt8))` data type (you can also use another [integer](../sql-reference/data-types/int-uint.md) types). It's also possible to pass table, database, column names as a parameter, in that case you would need to use `Identifier` as a data type. #### Example {#example} @@ -162,37 +162,37 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe You can pass parameters to `clickhouse-client` (all parameters have a default value) using: -- From the Command Line +- From the Command Line Command-line options override the default values and settings in configuration files. -- Configuration files. +- Configuration files. Settings in the configuration files override the default values. ### Command Line Options {#command-line-options} -- `--host, -h` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. -- `--port` – The port to connect to. Default value: 9000. Note that the HTTP interface and the native interface use different ports. -- `--user, -u` – The username. Default value: default. -- `--password` – The password. Default value: empty string. -- `--ask-password` - Prompt the user to enter a password. -- `--query, -q` – The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option. -- `--queries-file` – file path with queries to execute. You must specify either `query` or `queries-file` option. -- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). -- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). -- `--multiquery, -n` – If specified, allow processing multiple queries separated by semicolons. -- `--format, -f` – Use the specified default format to output the result. -- `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. -- `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. -- `--stacktrace` – If specified, also print the stack trace if an exception occurs. -- `--config-file` – The name of the configuration file. -- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). -- `--history_file` — Path to a file containing command history. -- `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). -- `--hardware-utilization` — Print hardware utilization information in progress bar. -- `--print-profile-events` – Print `ProfileEvents` packets. -- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). +- `--host, -h` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. +- `--port` – The port to connect to. Default value: 9000. Note that the HTTP interface and the native interface use different ports. +- `--user, -u` – The username. Default value: default. +- `--password` – The password. Default value: empty string. +- `--ask-password` - Prompt the user to enter a password. +- `--query, -q` – The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` – file path with queries to execute. Cannot be used simultaneously with `--query`. +- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. +- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). +- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). +- `--format, -f` – Use the specified default format to output the result. +- `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. +- `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. +- `--stacktrace` – If specified, also print the stack trace if an exception occurs. +- `--config-file` – The name of the configuration file. +- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). +- `--history_file` — Path to a file containing command history. +- `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). +- `--hardware-utilization` — Print hardware utilization information in progress bar. +- `--print-profile-events` – Print `ProfileEvents` packets. +- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). @@ -200,10 +200,10 @@ Since version 20.5, `clickhouse-client` has automatic syntax highlighting (alway `clickhouse-client` uses the first existing file of the following: -- Defined in the `--config-file` parameter. -- `./clickhouse-client.xml` -- `~/.clickhouse-client/config.xml` -- `/etc/clickhouse-client/config.xml` +- Defined in the `--config-file` parameter. +- `./clickhouse-client.xml` +- `~/.clickhouse-client/config.xml` +- `/etc/clickhouse-client/config.xml` Example of a config file: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b2b2c6d5b1e..2ab9e8caec4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1,7 +1,7 @@ --- slug: /en/interfaces/formats sidebar_position: 21 -sidebar_label: Input and Output Formats +sidebar_label: View all formats... title: Formats for Input and Output Data --- @@ -38,6 +38,7 @@ The supported formats are: | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | | [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | | [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | | [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | @@ -68,6 +69,7 @@ The supported formats are: | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | +| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | | [Arrow](#data-format-arrow) | ✔ | ✔ | | [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✔ | @@ -78,7 +80,7 @@ The supported formats are: | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | | [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✗ | +| [LineAsString](#lineasstring) | ✔ | ✔ | | [Regexp](#data-format-regexp) | ✔ | ✗ | | [RawBLOB](#rawblob) | ✔ | ✔ | | [MsgPack](#msgpack) | ✔ | ✔ | @@ -154,7 +156,7 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. -Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array. +Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) structures is represented as an array. For example: @@ -205,7 +207,7 @@ Differs from the `TabSeparated` format in that the column names are written in t During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness. -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -217,7 +219,7 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -255,11 +257,11 @@ where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as `column_i` is a name or index of a column whose values are to be selected or inserted (if empty, then column will be skipped), `serializeAs_i` is an escaping rule for the column values. The following escaping rules are supported: -- `CSV`, `JSON`, `XML` (similar to the formats of the same names) -- `Escaped` (similar to `TSV`) -- `Quoted` (similar to `Values`) -- `Raw` (without escaping, similar to `TSVRaw`) -- `None` (no escaping rule, see further) +- `CSV`, `JSON`, `XML` (similar to the formats of the same names) +- `Escaped` (similar to `TSV`) +- `Quoted` (similar to `Values`) +- `Raw` (without escaping, similar to `TSVRaw`) +- `None` (no escaping rule, see further) If an escaping rule is omitted, then `None` will be used. `XML` is suitable only for output. @@ -275,15 +277,15 @@ The `format_template_rows_between_delimiter` setting specifies the delimiter bet Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: -- `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. -- `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) -- `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1) -- `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1) -- `rows` is the total number of output rows -- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT. -- `time` is the request execution time in seconds -- `rows_read` is the number of rows has been read -- `bytes_read` is the number of bytes (uncompressed) has been read +- `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. +- `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) +- `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1) +- `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1) +- `rows` is the total number of output rows +- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT. +- `time` is the request execution time in seconds +- `rows_read` is the number of rows has been read +- `bytes_read` is the number of bytes (uncompressed) has been read The placeholders `data`, `totals`, `min` and `max` must not have escaping rule specified (or `None` must be specified explicitly). The remaining placeholders may have any escaping rule specified. If the `format_template_resultset` setting is an empty string, `${data}` is used as the default value. @@ -470,7 +472,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -480,7 +482,7 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -500,7 +502,7 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -510,7 +512,7 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -619,8 +621,8 @@ ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed **See Also** -- [JSONEachRow](#jsoneachrow) format -- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) setting +- [JSONEachRow](#jsoneachrow) format +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) setting For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings-formats.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. @@ -684,7 +686,7 @@ Example: ## JSONColumns {#jsoncolumns} :::tip -The output of the JSONColumns* formats provides the ClickHouse field name and then the content of each row of the table for that field; +The output of the JSONColumns* formats provides the ClickHouse field name and then the content of each row of the table for that field; visually, the data is rotated 90 degrees to the left. ::: @@ -917,6 +919,40 @@ Example: While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. +## PrettyJSONEachRow {#prettyjsoneachrow} + +Differs from JSONEachRow only in that JSON is pretty formatted with new line delimiters and 4 space indents. Suitable only for output. + +Example + +```json +{ + "num": "42", + "str": "hello", + "arr": [ + "0", + "1" + ], + "tuple": { + "num": 42, + "str": "world" + } +} +{ + "num": "43", + "str": "hello", + "arr": [ + "0", + "1", + "2" + ], + "tuple": { + "num": 43, + "str": "world" + } +} +``` + ## JSONStringsEachRow {#jsonstringseachrow} Differs from JSONEachRow only in that data fields are output in strings, not in typed JSON values. @@ -969,7 +1005,7 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -979,7 +1015,7 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -991,7 +1027,7 @@ the types from input data will be compared with the types of the corresponding c Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -1001,7 +1037,7 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -1096,8 +1132,8 @@ INSERT INTO UserActivity FORMAT JSONEachRow {"PageViews":5, "UserID":"4324182021 ClickHouse allows: -- Any order of key-value pairs in the object. -- Omitting some values. +- Any order of key-value pairs in the object. +- Omitting some values. ClickHouse ignores spaces between elements and commas after the objects. You can pass all the objects in one line. You do not have to separate them with line breaks. @@ -1117,10 +1153,10 @@ CREATE TABLE IF NOT EXISTS example_table ) ENGINE = Memory; ``` -- If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type). -- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`. +- If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type). +- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`. -:::warning +:::note When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`. ::: @@ -1150,7 +1186,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y ### Usage of Nested Structures {#jsoneachrow-nested} -If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting. +If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting. For example, consider the following table: @@ -1232,51 +1268,53 @@ Each row is formatted as a single document and each column is formatted as a sin For output it uses the following correspondence between ClickHouse types and BSON types: -| ClickHouse type | BSON Type | -|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------| -| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean | -| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | -| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | -| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | -| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double | -| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 | -| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 | -| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime | -| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 | -| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 | -| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | -| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | -| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | -| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| ClickHouse type | BSON Type | +|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------| +| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean | +| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 | +| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 | +| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | `\x01` double | +| [Date](/docs/en/sql-reference/data-types/date.md)/[Date32](/docs/en/sql-reference/data-types/date32.md) | `\x10` int32 | +| [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `\x12` int64 | +| [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `\x09` datetime | +| [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `\x10` int32 | +| [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `\x12` int64 | +| [Decimal128](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | -| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | -| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array | -| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array | -| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document | -| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document | +| [UUID](/docs/en/sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | +| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array | +| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array | +| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document | +| [Map](/docs/en/sql-reference/data-types/map.md) | `\x03` document | +| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 | +| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype | For input it uses the following correspondence between BSON types and ClickHouse types: -| BSON Type | ClickHouse Type | -|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | -| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | -| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | -| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | -| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | -| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) | -| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | -| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | -| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | -| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) | -| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | +| BSON Type | ClickHouse Type | +|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) | +| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | +| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | +| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) | +| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) | +| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | +| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) | +| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | +| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) | +| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. @@ -1445,10 +1483,10 @@ For [NULL](/docs/en/sql-reference/syntax.md/#null-literal) support, an additiona Similar to [RowBinary](#rowbinary), but with added header: -- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) -- N `String`s specifying column names +- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) +- N `String`s specifying column names -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -1458,11 +1496,11 @@ Otherwise, the first row will be skipped. Similar to [RowBinary](#rowbinary), but with added header: -- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) -- N `String`s specifying column names -- N `String`s specifying column types +- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) +- N `String`s specifying column names +- N `String`s specifying column types -:::warning +:::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. @@ -1608,27 +1646,34 @@ See also [Format Schema](#formatschema). The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | -|--------------------------------|-----------------------------------------------------------|--------------------------------| -| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | -| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | -| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` | -| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | -| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | -| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | -| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | -| `ENUM` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | -| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | +|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------| +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64` | +| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | +| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | +| `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | +| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` | +| `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` | +| `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` | +| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | + +Integer types can be converted into each other during input/output. For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting. -Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. +Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. ### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto} @@ -1772,7 +1817,7 @@ message MessageType { ``` ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on). -Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md). +Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/index.md). Default values defined in a protobuf schema like this @@ -1804,21 +1849,30 @@ ClickHouse Avro format supports reading and writing [Avro data files](https://av The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | -|---------------------------------------------|----------------------------------------------------------------------------------------------------|------------------------------| -| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\|16\|32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` | -| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` | -| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` | -| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* | -| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` | -| `enum` | [Enum(8\|16)](/docs/en/sql-reference/data-types/enum.md) | `enum` | -| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` | -| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` | -| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` | -| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md) | `int (date)` \** | -| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | +| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | +|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------|-------------------------------| +| `boolean`, `int`, `long`, `float`, `double` | [Int(8\16\32)](/docs/en/sql-reference/data-types/int-uint.md), [UInt(8\16\32)](/docs/en/sql-reference/data-types/int-uint.md) | `int` | +| `boolean`, `int`, `long`, `float`, `double` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `long` | +| `boolean`, `int`, `long`, `float`, `double` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float` | +| `boolean`, `int`, `long`, `float`, `double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `double` | +| `bytes`, `string`, `fixed`, `enum` | [String](/docs/en/sql-reference/data-types/string.md) | `bytes` or `string` \* | +| `bytes`, `string`, `fixed` | [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) | `fixed(N)` | +| `enum` | [Enum(8\16)](/docs/en/sql-reference/data-types/enum.md) | `enum` | +| `array(T)` | [Array(T)](/docs/en/sql-reference/data-types/array.md) | `array(T)` | +| `map(V, K)` | [Map(V, K)](/docs/en/sql-reference/data-types/map.md) | `map(string, K)` | +| `union(null, T)`, `union(T, null)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(null, T)` | +| `null` | [Nullable(Nothing)](/docs/en/sql-reference/data-types/special-data-types/nothing.md) | `null` | +| `int (date)` \** | [Date](/docs/en/sql-reference/data-types/date.md), [Date32](docs/en/sql-reference/data-types/date32.md) | `int (date)` \** | +| `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \** | +| `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \** | +| `bytes (decimal)` \** | [DateTime64(N)](/docs/en/sql-reference/data-types/datetime.md) | `bytes (decimal)` \** | +| `int` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `int` | +| `fixed(16)` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `fixed(16)` | +| `bytes (decimal)` \** | [Decimal(P, S)](/docs/en/sql-reference/data-types/decimal.md) | `bytes (decimal)` \** | +| `string (uuid)` \** | [UUID](/docs/en/sql-reference/data-types/uuid.md) | `string (uuid)` \** | +| `fixed(16)` | [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(16)` | +| `fixed(32)` | [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(32)` | + \* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern) \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) @@ -1854,11 +1908,18 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Avro" > file.avro Column names must: -- start with `[A-Za-z_]` -- subsequently contain only `[A-Za-z0-9_]` +- start with `[A-Za-z_]` +- subsequently contain only `[A-Za-z0-9_]` Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively. +### Example Data {#example-data-avro} + +Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket: + +``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro'); +``` + ## AvroConfluent {#data-format-avro-confluent} AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html). @@ -1906,7 +1967,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -:::warning +:::note Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine. ::: @@ -1918,28 +1979,31 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | -|-----------------------------------------------|-----------------------------------------------------------------|------------------------------| -| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | -| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` | -| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` | -| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` | -| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | -| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` | -| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | -| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | +| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | +|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-------------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` | +| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` | +| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | +| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. @@ -1967,12 +2031,145 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. - [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. +- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`. - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. +- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`. + +## ParquetMetadata {data-format-parquet-metadata} + +Special format for reading Parquet file metadata (https://parquet.apache.org/docs/file-format/metadata/). It always outputs one row with the next structure/content: +- num_columns - the number of columns +- num_rows - the total number of rows +- num_row_groups - the total number of row groups +- format_version - parquet format version, always 1.0 or 2.6 +- total_uncompressed_size - total uncompressed bytes size of the data, calculated as the sum of total_byte_size from all row groups +- total_compressed_size - total compressed bytes size of the data, calculated as the sum of total_compressed_size from all row groups +- columns - the list of columns metadata with the next structure: + - name - column name + - path - column path (differs from name for nested column) + - max_definition_level - maximum definition level + - max_repetition_level - maximum repetition level + - physical_type - column physical type + - logical_type - column logical type + - compression - compression used for this column + - total_uncompressed_size - total uncompressed bytes size of the column, calculated as the sum of total_uncompressed_size of the column from all row groups + - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups + - space_saved - percent of space saved by compression, calculated as (1 - total_compressed_size/total_uncompressed_size). + - encodings - the list of encodings used for this column +- row_groups - the list of row groups metadata with the next structure: + - num_columns - the number of columns in the row group + - num_rows - the number of rows in the row group + - total_uncompressed_size - total uncompressed bytes size of the row group + - total_compressed_size - total compressed bytes size of the row group + - columns - the list of column chunks metadata with the next structure: + - name - column name + - path - column path + - total_compressed_size - total compressed bytes size of the column + - total_uncompressed_size - total uncompressed bytes size of the row group + - have_statistics - boolean flag that indicates if column chunk metadata contains column statistics + - statistics - column chunk statistics (all fields are NULL if have_statistics = false) with the next structure: + - num_values - the number of non-null values in the column chunk + - null_count - the number of NULL values in the column chunk + - distinct_count - the number of distinct values in the column chunk + - min - the minimum value of the column chunk + - max - the maximum column of the column chunk + +Example: + +```sql +SELECT * FROM file(data.parquet, ParquetMetadata) format PrettyJSONEachRow +``` + +```json +{ + "num_columns": "2", + "num_rows": "100000", + "num_row_groups": "2", + "format_version": "2.6", + "metadata_size": "577", + "total_uncompressed_size": "282436", + "total_compressed_size": "26633", + "columns": [ + { + "name": "number", + "path": "number", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "INT32", + "logical_type": "Int(bitWidth=16, isSigned=false)", + "compression": "LZ4", + "total_uncompressed_size": "133321", + "total_compressed_size": "13293", + "space_saved": "90.03%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + }, + { + "name": "concat('Hello', toString(modulo(number, 1000)))", + "path": "concat('Hello', toString(modulo(number, 1000)))", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "BYTE_ARRAY", + "logical_type": "None", + "compression": "LZ4", + "total_uncompressed_size": "149115", + "total_compressed_size": "13340", + "space_saved": "91.05%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + } + ], + "row_groups": [ + { + "num_columns": "2", + "num_rows": "65409", + "total_uncompressed_size": "179809", + "total_compressed_size": "14163", + "columns": [ + { + "name": "number", + "path": "number", + "total_compressed_size": "7070", + "total_uncompressed_size": "85956", + "have_statistics": true, + "statistics": { + "num_values": "65409", + "null_count": "0", + "distinct_count": null, + "min": "0", + "max": "999" + } + }, + { + "name": "concat('Hello', toString(modulo(number, 1000)))", + "path": "concat('Hello', toString(modulo(number, 1000)))", + "total_compressed_size": "7093", + "total_uncompressed_size": "93853", + "have_statistics": true, + "statistics": { + "num_values": "65409", + "null_count": "0", + "distinct_count": null, + "min": "Hello0", + "max": "Hello999" + } + } + ] + }, + ... + ] +} +``` ## Arrow {#data-format-arrow} @@ -1984,29 +2181,32 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | -|-----------------------------------------|-----------------------------------------------------------------|----------------------------| -| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | -| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | -| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | -| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` | -| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` | -| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | -| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` | -| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | -| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` | -| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | +| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | +|-----------------------------------------|------------------------------------------------------------------------------------------------------------|----------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` | +| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | +| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` | +| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. @@ -2041,6 +2241,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. - [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`. +- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `lz4_frame`. ## ArrowStream {#data-format-arrow-stream} @@ -2054,22 +2255,26 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | -|---------------------------------------|---------------------------------------------------------|--------------------------| -| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` | -| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` | -| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` | -| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | -| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` | -| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` | -| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` | -| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` | -| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` | -| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` | -| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` | -| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` | -| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | -| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | +| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | +|---------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------| +| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` | +| `Tinyint` | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `Tinyint` | +| `Smallint` | [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `Smallint` | +| `Int` | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| `Bigint` | [Int64/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` | +| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` | +| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` | +| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` | +| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` | +| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` | +| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` | +| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` | +| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | +| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | +| `Int` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| `Binary` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `Binary` | +| `Binary` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `Binary` | +| `Binary` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `Binary` | Other types are not supported. @@ -2096,6 +2301,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. ### Arrow format settings {#parquet-format-settings} - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`. - [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. @@ -2133,17 +2339,17 @@ Each line of imported data is parsed according to the regular expression. When working with the `Regexp` format, you can use the following settings: -- `format_regexp` — [String](/docs/en/sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. +- `format_regexp` — [String](/docs/en/sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. -- `format_regexp_escaping_rule` — [String](/docs/en/sql-reference/data-types/string.md). The following escaping rules are supported: +- `format_regexp_escaping_rule` — [String](/docs/en/sql-reference/data-types/string.md). The following escaping rules are supported: - - CSV (similarly to [CSV](#csv)) - - JSON (similarly to [JSONEachRow](#jsoneachrow)) - - Escaped (similarly to [TSV](#tabseparated)) - - Quoted (similarly to [Values](#data-format-values)) - - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) + - CSV (similarly to [CSV](#csv)) + - JSON (similarly to [JSONEachRow](#jsoneachrow)) + - Escaped (similarly to [TSV](#tabseparated)) + - Quoted (similarly to [Values](#data-format-values)) + - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) -- `format_regexp_skip_unmatched` — [UInt8](/docs/en/sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. +- `format_regexp_skip_unmatched` — [UInt8](/docs/en/sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. **Usage** @@ -2264,20 +2470,28 @@ ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data ### Data Types Matching {#data-types-matching-msgpack} -| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) | -|--------------------------------------------------------------------|-----------------------------------------------------------|------------------------------------| -| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` | -| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` | -| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | -| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | -| `float 32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float 32` | -| `float 64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `float 64` | -| `uint 16` | [Date](/docs/en/sql-reference/data-types/date.md) | `uint 16` | -| `uint 32` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `uint 32` | -| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | -| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md) | `fixarray`, `array 16`, `array 32` | -| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | +| MessagePack data type (`INSERT`) | ClickHouse data type | MessagePack data type (`SELECT`) | +|--------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|----------------------------------| +| `uint N`, `positive fixint` | [UIntN](/docs/en/sql-reference/data-types/int-uint.md) | `uint N` | +| `int N`, `negative fixint` | [IntN](/docs/en/sql-reference/data-types/int-uint.md) | `int N` | +| `bool` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `uint 8` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8`, `bin 16`, `bin 32` | +| `fixstr`, `str 8`, `str 16`, `str 32`, `bin 8`, `bin 16`, `bin 32` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `bin 8`, `bin 16`, `bin 32` | +| `float 32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `float 32` | +| `float 64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `float 64` | +| `uint 16` | [Date](/docs/en/sql-reference/data-types/date.md) | `uint 16` | +| `int 32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `int 32` | +| `uint 32` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `uint 32` | +| `uint 64` | [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `uint 64` | +| `fixarray`, `array 16`, `array 32` | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) | `fixarray`, `array 16`, `array 32` | +| `fixmap`, `map 16`, `map 32` | [Map](/docs/en/sql-reference/data-types/map.md) | `fixmap`, `map 16`, `map 32` | +| `uint 32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `uint 32` | +| `bin 8` | [String](/docs/en/sql-reference/data-types/string.md) | `bin 8` | +| `int 8` | [Enum8](/docs/en/sql-reference/data-types/enum.md) | `int 8` | +| `bin 8` | [(U)Int128/(U)Int256](/docs/en/sql-reference/data-types/int-uint.md) | `bin 8` | +| `int 32` | [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `int 32` | +| `int 64` | [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `int 64` | +| `bin 8` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `bin 8 ` | Example: diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index 807663be646..3087ad20eac 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -10,14 +10,14 @@ sidebar_label: gRPC Interface ClickHouse supports [gRPC](https://grpc.io/) interface. It is an open source remote procedure call system that uses HTTP/2 and [Protocol Buffers](https://en.wikipedia.org/wiki/Protocol_Buffers). The implementation of gRPC in ClickHouse supports: -- SSL; -- authentication; -- sessions; -- compression; -- parallel queries through the same channel; -- cancellation of queries; -- getting progress and logs; -- external tables. +- SSL; +- authentication; +- sessions; +- compression; +- parallel queries through the same channel; +- cancellation of queries; +- getting progress and logs; +- external tables. The specification of the interface is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). @@ -64,15 +64,15 @@ Or you can use a built-in Python client. It is placed in [utils/grpc-client/clic The client supports the following arguments: -- `--help` – Shows a help message and exits. -- `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. -- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`. -- `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. -- `--password PASSWORD` – A password. Default value: empty string. -- `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. -- `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). -- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`. -- `--debug` – Enables showing debug information. +- `--help` – Shows a help message and exits. +- `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. +- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`. +- `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. +- `--password PASSWORD` – A password. Default value: empty string. +- `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. +- `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`. +- `--debug` – Enables showing debug information. To run the client in an interactive mode call it without `--query` argument. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 9af6df0c87d..3a7f6d4d854 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -293,11 +293,11 @@ X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_ro Possible header fields: -- `read_rows` — Number of rows read. -- `read_bytes` — Volume of data read in bytes. -- `total_rows_to_read` — Total number of rows to be read. -- `written_rows` — Number of rows written. -- `written_bytes` — Volume of data written in bytes. +- `read_rows` — Number of rows read. +- `read_bytes` — Volume of data read in bytes. +- `total_rows_to_read` — Total number of rows to be read. +- `written_rows` — Number of rows written. +- `written_bytes` — Volume of data written in bytes. Running requests do not stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server-side, and using the network might be ineffective. The optional ‘query_id’ parameter can be passed as the query ID (any string). For more information, see the section “Settings, replace_running_query”. @@ -309,6 +309,7 @@ The HTTP interface allows passing external data (external temporary tables) for ## Response Buffering {#response-buffering} You can enable response buffering on the server-side. The `buffer_size` and `wait_end_of_query` URL parameters are provided for this purpose. +Also settings `http_response_buffer_size` and `http_wait_end_of_query` can be used. `buffer_size` determines the number of bytes in the result to buffer in the server memory. If a result body is larger than this threshold, the buffer is written to the HTTP channel, and the remaining data is sent directly to the HTTP channel. @@ -332,6 +333,35 @@ You can create a query with parameters and pass values for them from the corresp $ curl -sS "
?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}" ``` +### Tabs in URL Parameters + +Query parameters are parsed from the "escaped" format. This has some benefits, such as the possibility to unambiguously parse nulls as `\N`. This means the tab character should be encoded as `\t` (or `\` and a tab). For example, the following contains an actual tab between `abc` and `123` and the input string is split into two values: + +```bash +curl -sS "http://localhost:8123" -d "SELECT splitByChar('\t', 'abc 123')" +``` + +```response +['abc','123'] +``` + +However, if you try to encode an actual tab using `%09` in a URL parameter, it won't get parsed properly: + +```bash +curl -sS "http://localhost:8123?param_arg1=abc%09123" -d "SELECT splitByChar('\t', {arg1:String})" +Code: 457. DB::Exception: Value abc 123 cannot be parsed as String for query parameter 'arg1' because it isn't parsed completely: only 3 of 7 bytes was parsed: abc. (BAD_QUERY_PARAMETER) (version 23.4.1.869 (official build)) +``` + +If you are using URL parameters, you will need to encode the `\t` as `%5C%09`. For example: + +```bash +curl -sS "http://localhost:8123?param_arg1=abc%5C%09123" -d "SELECT splitByChar('\t', {arg1:String})" +``` + +```response +['abc','123'] +``` + ## Predefined HTTP Interface {#predefined_http_interface} ClickHouse supports specific queries through the HTTP interface. For example, you can write data to a table as follows: @@ -344,7 +374,7 @@ ClickHouse also supports Predefined HTTP Interface which can help you more easil Example: -- First of all, add this section to server configuration file: +- First of all, add this section to server configuration file: @@ -363,7 +393,7 @@ Example: ``` -- You can now request the URL directly for data in the Prometheus format: +- You can now request the URL directly for data in the Prometheus format: @@ -418,22 +448,22 @@ As you can see from the example if `http_handlers` is configured in the config.x Now `rule` can configure `method`, `headers`, `url`, `handler`: - `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request. -- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request. +- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request. -- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. +- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. -- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. +- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. `type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static). - - `query` — use with `predefined_query_handler` type, executes query when the handler is called. + - `query` — use with `predefined_query_handler` type, executes query when the handler is called. - - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request parameters. + - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request parameters. - - `status` — use with `static` type, response status code. + - `status` — use with `static` type, response status code. - - `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). + - `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). - - `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client. + - `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client. Next are the configuration methods for different `type`. @@ -445,7 +475,7 @@ Next are the configuration methods for different `type`. The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. -:::warning +:::note To keep the default `handlers` such as` query`, `play`,` ping`, add the `` rule. ::: @@ -476,7 +506,7 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost: max_final_threads 2 ``` -:::warning +:::note In one `predefined_query_handler` only supports one `query` of an insert type. ::: diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md index b2ff2829af9..2b68316cc3d 100644 --- a/docs/en/interfaces/jdbc.md +++ b/docs/en/interfaces/jdbc.md @@ -8,6 +8,6 @@ sidebar_label: JDBC Driver Use the [official JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc) (and Java client) to access ClickHouse from your Java applications. -- Third-party drivers: - - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) - - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) +- Third-party drivers: + - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) + - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index c2d6038125b..fab3ba42758 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -57,9 +57,9 @@ If user password is specified using [SHA256](../operations/settings/settings-use Restrictions: -- prepared queries are not supported +- prepared queries are not supported -- some data types are sent as strings +- some data types are sent as strings To cancel a long query use `KILL QUERY connection_id` statement (it is replaced with `KILL QUERY WHERE query_id = connection_id` while proceeding). For example: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index e5fa503e8fc..8f16dcf5f83 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -6,23 +6,23 @@ keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, clien description: ClickHouse provides three network interfaces --- -# Interfaces +# Drivers and Interfaces ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security): -- [HTTP](http.md), which is documented and easy to use directly. -- [Native TCP](../interfaces/tcp.md), which has less overhead. -- [gRPC](grpc.md). +- [HTTP](http.md), which is documented and easy to use directly. +- [Native TCP](../interfaces/tcp.md), which has less overhead. +- [gRPC](grpc.md). In most cases it is recommended to use an appropriate tool or library instead of interacting with those directly. The following are officially supported by ClickHouse: -- [Command-line client](../interfaces/cli.md) -- [JDBC driver](../interfaces/jdbc.md) -- [ODBC driver](../interfaces/odbc.md) -- [C++ client library](../interfaces/cpp.md) +- [Command-line client](../interfaces/cli.md) +- [JDBC driver](../interfaces/jdbc.md) +- [ODBC driver](../interfaces/odbc.md) +- [C++ client library](../interfaces/cpp.md) There are also a wide range of third-party libraries for working with ClickHouse: -- [Client libraries](../interfaces/third-party/client-libraries.md) -- [Integrations](../interfaces/third-party/integrations.md) -- [Visual interfaces](../interfaces/third-party/gui.md) +- [Client libraries](../interfaces/third-party/client-libraries.md) +- [Integrations](../interfaces/third-party/integrations.md) +- [Visual interfaces](../interfaces/third-party/gui.md) diff --git a/docs/en/interfaces/postgresql.md b/docs/en/interfaces/postgresql.md index 9ff83559787..1146274b012 100644 --- a/docs/en/interfaces/postgresql.md +++ b/docs/en/interfaces/postgresql.md @@ -8,7 +8,7 @@ sidebar_label: PostgreSQL Interface ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directly supported by ClickHouse (for example, Amazon Redshift). -To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: +To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: ```xml @@ -54,7 +54,7 @@ default=> And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse. -:::caution +:::note The PostgreSQL protocol currently only supports plain-text passwords. ::: diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 25bdb0c36a3..c448d0aee47 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -117,7 +117,7 @@ clickhouse-local --file='hobbies.jsonl' --table='hobbies' --query='SELECT * FROM 4 47 Brayan ['movies','skydiving'] ``` -# Using structure from insertion table {#using-structure-from-insertion-table} +## Using structure from insertion table {#using-structure-from-insertion-table} When table functions `file/s3/url/hdfs` are used to insert data into a table, there is an option to use the structure from the insertion table instead of extracting it from the data. @@ -222,7 +222,7 @@ INSERT INTO hobbies4 SELECT id, empty(hobbies) ? NULL : hobbies[1] FROM file(hob In this case, there are some operations performed on the column `hobbies` in the `SELECT` query to insert it into the table, so ClickHouse cannot use the structure from the insertion table, and schema inference will be used. -# Schema inference cache {#schema-inference-cache} +## Schema inference cache {#schema-inference-cache} For most input formats schema inference reads some data to determine its structure and this process can take some time. To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache. @@ -326,14 +326,14 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3' └─────────┘ ``` -# Text formats {#text-formats} +## Text formats {#text-formats} For text formats, ClickHouse reads the data row by row, extracts column values according to the format, and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000. By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section). -## JSON formats {#json-formats} +### JSON formats {#json-formats} In JSON formats ClickHouse parses values according to the JSON specification and then tries to find the most appropriate data type for them. @@ -464,9 +464,9 @@ most likely this column contains only Nulls or empty Arrays/Maps. ... ``` -### JSON settings {#json-settings} +#### JSON settings {#json-settings} -#### input_format_json_read_objects_as_strings +##### input_format_json_read_objects_as_strings Enabling this setting allows reading nested JSON objects as strings. This setting can be used to read nested JSON objects without using JSON object type. @@ -486,7 +486,7 @@ DESC format(JSONEachRow, $$ └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -#### input_format_json_try_infer_numbers_from_strings +##### input_format_json_try_infer_numbers_from_strings Enabling this setting allows inferring numbers from string values. @@ -507,7 +507,7 @@ DESC format(JSONEachRow, $$ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -#### input_format_json_read_numbers_as_strings +##### input_format_json_read_numbers_as_strings Enabling this setting allows reading numeric values as strings. @@ -528,7 +528,7 @@ DESC format(JSONEachRow, $$ └───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -#### input_format_json_read_bools_as_numbers +##### input_format_json_read_bools_as_numbers Enabling this setting allows reading Bool values as numbers. @@ -549,7 +549,7 @@ DESC format(JSONEachRow, $$ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## CSV {#csv} +### CSV {#csv} In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse the data inside quotes using the recursive parser and then tries to find the most appropriate data type for it. If the value is not in double quotes, ClickHouse tries to parse it as a number, @@ -726,7 +726,7 @@ $$) └──────────────┴───────────────┘ ``` -## TSV/TSKV {#tsv-tskv} +### TSV/TSKV {#tsv-tskv} In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using the recursive parser to determine the most appropriate type. If the type cannot be determined, ClickHouse treats this value as String. @@ -1019,7 +1019,7 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!') └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## CustomSeparated {#custom-separated} +### CustomSeparated {#custom-separated} In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer the data type for each value according to escaping rule. @@ -1080,7 +1080,7 @@ $$) └────────┴───────────────┴────────────┘ ``` -## Template {#template} +### Template {#template} In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the data type for each value according to its escaping rule. @@ -1120,7 +1120,7 @@ $$) └──────────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Regexp {#regexp} +### Regexp {#regexp} Similar to Template, in Regexp format ClickHouse first extracts all column values from the row according to specified regular expression and then tries to infer data type for each value according to the specified escaping rule. @@ -1142,9 +1142,9 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$) └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Settings for text formats {settings-for-text-formats} +### Settings for text formats {#settings-for-text-formats} -### input_format_max_rows_to_read_for_schema_inference +#### input_format_max_rows_to_read_for_schema_inference This setting controls the maximum number of rows to be read while schema inference. The more rows are read, the more time is spent on schema inference, but the greater the chance to @@ -1152,7 +1152,7 @@ correctly determine the types (especially when the data contains a lot of nulls) Default value: `25000`. -### column_names_for_schema_inference +#### column_names_for_schema_inference The list of column names to use in schema inference for formats without explicit column names. Specified names will be used instead of default `c1,c2,c3,...`. The format: `column1,column2,column3,...`. @@ -1169,7 +1169,7 @@ DESC format(TSV, 'Hello, World! 42 [1, 2, 3]') settings column_names_for_schema_ └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### schema_inference_hints +#### schema_inference_hints The list of column names and types to use in schema inference instead of automatically determined types. The format: 'column_name1 column_type1, column_name2 column_type2, ...'. This setting can be used to specify the types of columns that could not be determined automatically or for optimizing the schema. @@ -1189,7 +1189,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### schema_inference_make_columns_nullable +#### schema_inference_make_columns_nullable Controls making inferred types `Nullable` in schema inference for formats without information about nullability. If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference. @@ -1232,7 +1232,7 @@ DESC format(JSONEachRow, $$ └─────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### input_format_try_infer_integers +#### input_format_try_infer_integers If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from sample data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. @@ -1289,7 +1289,7 @@ DESC format(JSONEachRow, $$ └────────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -### input_format_try_infer_datetimes +#### input_format_try_infer_datetimes If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, @@ -1337,7 +1337,7 @@ DESC format(JSONEachRow, $$ Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) -### input_format_try_infer_dates +#### input_format_try_infer_dates If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in sample data were successfully parsed as dates, the result type will be `Date`, @@ -1383,14 +1383,14 @@ DESC format(JSONEachRow, $$ └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -# Self describing formats {#self-describing-formats} +## Self describing formats {#self-describing-formats} Self-describing formats contain information about the structure of the data in the data itself, it can be some header with a description, a binary type tree, or some kind of table. To automatically infer a schema from files in such formats, ClickHouse reads a part of the data containing information about the types and converts it into a schema of the ClickHouse table. -## Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types} +### Formats with -WithNamesAndTypes suffix {#formats-with-names-and-types} ClickHouse supports some text formats with the suffix -WithNamesAndTypes. This suffix means that the data contains two additional rows with column names and types before the actual data. While schema inference for such formats, ClickHouse reads the first two rows and extracts column names and types. @@ -1412,7 +1412,7 @@ $$) └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## JSON formats with metadata {#json-with-metadata} +### JSON formats with metadata {#json-with-metadata} Some JSON input formats ([JSON](formats.md#json), [JSONCompact](formats.md#json-compact), [JSONColumnsWithMetadata](formats.md#jsoncolumnswithmetadata)) contain metadata with column names and types. In schema inference for such formats, ClickHouse reads this metadata. @@ -1465,7 +1465,7 @@ $$) └──────┴──────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## Avro {#avro} +### Avro {#avro} In Avro format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1473,6 +1473,7 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic |------------------------------------|--------------------------------------------------------------------------------| | `boolean` | [Bool](../sql-reference/data-types/boolean.md) | | `int` | [Int32](../sql-reference/data-types/int-uint.md) | +| `int (date)` \* | [Date32](../sql-reference/data-types/date32.md) | | `long` | [Int64](../sql-reference/data-types/int-uint.md) | | `float` | [Float32](../sql-reference/data-types/float.md) | | `double` | [Float64](../sql-reference/data-types/float.md) | @@ -1482,10 +1483,14 @@ In Avro format ClickHouse reads its schema from the data and converts it to Clic | `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | | `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | | `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | +| `string (uuid)` \* | [UUID](../sql-reference/data-types/uuid.md) | +| `binary (decimal)` \* | [Decimal(P, S)](../sql-reference/data-types/decimal.md) | + +\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Other Avro types are not supported. -## Parquet {#parquet} +### Parquet {#parquet} In Parquet format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1513,7 +1518,7 @@ In Parquet format ClickHouse reads its schema from the data and converts it to C Other Parquet types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## Arrow {#arrow} +### Arrow {#arrow} In Arrow format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1541,7 +1546,7 @@ In Arrow format ClickHouse reads its schema from the data and converts it to Cli Other Arrow types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## ORC {#orc} +### ORC {#orc} In ORC format ClickHouse reads its schema from the data and converts it to ClickHouse schema using the following type matches: @@ -1564,17 +1569,17 @@ In ORC format ClickHouse reads its schema from the data and converts it to Click Other ORC types are not supported. By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## Native {#native} +### Native {#native} Native format is used inside ClickHouse and contains the schema in the data. In schema inference, ClickHouse reads the schema from the data without any transformations. -# Formats with external schema {#formats-with-external-schema} +## Formats with external schema {#formats-with-external-schema} Such formats require a schema describing the data in a separate file in a specific schema language. To automatically infer a schema from files in such formats, ClickHouse reads external schema from a separate file and transforms it to a ClickHouse table schema. -# Protobuf {#protobuf} +### Protobuf {#protobuf} In schema inference for Protobuf format ClickHouse uses the following type matches: @@ -1592,7 +1597,7 @@ In schema inference for Protobuf format ClickHouse uses the following type match | `repeated T` | [Array(T)](../sql-reference/data-types/array.md) | | `message`, `group` | [Tuple](../sql-reference/data-types/tuple.md) | -# CapnProto {#capnproto} +### CapnProto {#capnproto} In schema inference for CapnProto format ClickHouse uses the following type matches: @@ -1615,13 +1620,13 @@ In schema inference for CapnProto format ClickHouse uses the following type matc | `struct` | [Tuple](../sql-reference/data-types/tuple.md) | | `union(T, Void)`, `union(Void, T)` | [Nullable(T)](../sql-reference/data-types/nullable.md) | -# Strong-typed binary formats {#strong-typed-binary-formats} +## Strong-typed binary formats {#strong-typed-binary-formats} In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table. In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts the type (and possibly name) for each value from the data and then converts these types to ClickHouse types. -## MsgPack {msgpack} +### MsgPack {#msgpack} In MsgPack format there is no delimiter between rows, to use schema inference for this format you should specify the number of columns in the table using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the following type matches: @@ -1641,7 +1646,7 @@ using the setting `input_format_msgpack_number_of_columns`. ClickHouse uses the By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -## BSONEachRow {#bsoneachrow} +### BSONEachRow {#bsoneachrow} In BSONEachRow each row of data is presented as a BSON document. In schema inference ClickHouse reads BSON documents one by one and extracts values, names, and types from the data and then transforms these types to ClickHouse types using the following type matches: @@ -1661,11 +1666,11 @@ values, names, and types from the data and then transforms these types to ClickH By default, all inferred types are inside `Nullable`, but it can be changed using the setting `schema_inference_make_columns_nullable`. -# Formats with constant schema {#formats-with-constant-schema} +## Formats with constant schema {#formats-with-constant-schema} Data in such formats always have the same schema. -## LineAsString {#line-as-string} +### LineAsString {#line-as-string} In this format, ClickHouse reads the whole line from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `line`. @@ -1680,7 +1685,7 @@ DESC format(LineAsString, 'Hello\nworld!') └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## JSONAsString {#json-as-string} +### JSONAsString {#json-as-string} In this format, ClickHouse reads the whole JSON object from the data into a single column with `String` data type. The inferred type for this format is always `String` and the column name is `json`. @@ -1695,7 +1700,7 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') └──────┴────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -## JSONAsObject {#json-as-object} +### JSONAsObject {#json-as-object} In this format, ClickHouse reads the whole JSON object from the data into a single column with `Object('json')` data type. Inferred type for this format is always `String` and the column name is `json`. diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 4114e47e982..f7603994163 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -6,71 +6,72 @@ sidebar_label: Client Libraries # Client Libraries from Third-party Developers -:::warning +:::note ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. ::: -- Python - - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) - - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - - [clickhouse-client](https://github.com/yurial/clickhouse-client) - - [aiochclient](https://github.com/maximdanilchenko/aiochclient) - - [asynch](https://github.com/long2ice/asynch) -- PHP - - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) - - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) - - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) - - [SeasClick C++ client](https://github.com/SeasX/SeasClick) - - [one-ck](https://github.com/lizhichao/one-ck) - - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) - - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) -- Go - - [clickhouse](https://github.com/kshvakov/clickhouse/) - - [go-clickhouse](https://github.com/roistat/go-clickhouse) - - [chconn](https://github.com/vahid-sohrabloo/chconn) - - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) - - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) - - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/) -- Swift - - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) - - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) -- NodeJs - - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - - [node-clickhouse](https://github.com/apla/node-clickhouse) - - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - - [clickhouse-client](https://github.com/depyronick/clickhouse-client) - - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) -- Perl - - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) - - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) -- Ruby - - [ClickHouse (Ruby)](https://github.com/shlima/click_house) - - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) -- Rust - - [clickhouse.rs](https://github.com/loyd/clickhouse.rs) - - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - - [Klickhouse](https://github.com/Protryon/klickhouse) -- R - - [RClickHouse](https://github.com/IMSMWU/RClickHouse) -- Java - - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) - - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) -- Scala - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) -- Kotlin - - [AORM](https://github.com/TanVD/AORM) -- C# - - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient) - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) - - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) - - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) -- Elixir - - [clickhousex](https://github.com/appodeal/clickhousex/) - - [pillar](https://github.com/sofakingworld/pillar) -- Nim - - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) -- Haskell - - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse) +- Python + - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) + - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) + - [clickhouse-client](https://github.com/yurial/clickhouse-client) + - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) +- PHP + - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) + - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) + - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) + - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) + - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) + - [SeasClick C++ client](https://github.com/SeasX/SeasClick) + - [one-ck](https://github.com/lizhichao/one-ck) + - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) + - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) + - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) +- Go + - [clickhouse](https://github.com/kshvakov/clickhouse/) + - [go-clickhouse](https://github.com/roistat/go-clickhouse) + - [chconn](https://github.com/vahid-sohrabloo/chconn) + - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) + - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) + - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/) +- Swift + - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) + - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) +- NodeJs + - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) + - [node-clickhouse](https://github.com/apla/node-clickhouse) + - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) + - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) +- Perl + - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) + - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) + - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) +- Ruby + - [ClickHouse (Ruby)](https://github.com/shlima/click_house) + - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) +- Rust + - [clickhouse.rs](https://github.com/loyd/clickhouse.rs) + - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) + - [Klickhouse](https://github.com/Protryon/klickhouse) +- R + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) +- Java + - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) + - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) +- Scala + - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) +- Kotlin + - [AORM](https://github.com/TanVD/AORM) +- C# + - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient) + - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) + - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) +- Elixir + - [clickhousex](https://github.com/appodeal/clickhousex/) + - [pillar](https://github.com/sofakingworld/pillar) +- Nim + - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) +- Haskell + - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index cba6240788a..900764b8128 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -14,11 +14,11 @@ Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) pr Features: -- Works with ClickHouse directly from the browser, without the need to install additional software. -- Query editor with syntax highlighting. -- Auto-completion of commands. -- Tools for graphical analysis of query execution. -- Colour scheme options. +- Works with ClickHouse directly from the browser, without the need to install additional software. +- Query editor with syntax highlighting. +- Auto-completion of commands. +- Tools for graphical analysis of query execution. +- Colour scheme options. [Tabix documentation](https://tabix.io/doc/). @@ -28,21 +28,21 @@ Features: Features: -- Query builder with syntax highlighting. View the response in a table or JSON view. -- Export query results as CSV or JSON. -- List of processes with descriptions. Write mode. Ability to stop (`KILL`) a process. -- Database graph. Shows all tables and their columns with additional information. -- A quick view of the column size. -- Server configuration. +- Query builder with syntax highlighting. View the response in a table or JSON view. +- Export query results as CSV or JSON. +- List of processes with descriptions. Write mode. Ability to stop (`KILL`) a process. +- Database graph. Shows all tables and their columns with additional information. +- A quick view of the column size. +- Server configuration. The following features are planned for development: -- Database management. -- User management. -- Real-time data analysis. -- Cluster monitoring. -- Cluster management. -- Monitoring replicated and Kafka tables. +- Database management. +- User management. +- Real-time data analysis. +- Cluster monitoring. +- Cluster management. +- Monitoring replicated and Kafka tables. ### LightHouse {#lighthouse} @@ -50,9 +50,9 @@ The following features are planned for development: Features: -- Table list with filtering and metadata. -- Table preview with filtering and sorting. -- Read-only queries execution. +- Table list with filtering and metadata. +- Table preview with filtering and sorting. +- Read-only queries execution. ### Redash {#redash} @@ -62,9 +62,9 @@ Supports for multiple data sources including ClickHouse, Redash can join results Features: -- Powerful editor of queries. -- Database explorer. -- Visualization tools, that allow you to represent data in different forms. +- Powerful editor of queries. +- Database explorer. +- Visualization tools, that allow you to represent data in different forms. ### Grafana {#grafana} @@ -92,10 +92,10 @@ Features: Features: -- Query development with syntax highlight and autocompletion. -- Table list with filters and metadata search. -- Table data preview. -- Full-text search. +- Query development with syntax highlight and autocompletion. +- Table list with filters and metadata search. +- Table data preview. +- Full-text search. By default, DBeaver does not connect using a session (the CLI for example does). If you require session support (for example to set settings for your session), edit the driver connection properties and set `session_id` to a random string (it uses the http connection under the hood). Then you can use any setting from the query window. @@ -105,10 +105,10 @@ By default, DBeaver does not connect using a session (the CLI for example does). Features: -- Autocompletion. -- Syntax highlighting for the queries and data output. -- Pager support for the data output. -- Custom PostgreSQL-like commands. +- Autocompletion. +- Syntax highlighting for the queries and data output. +- Pager support for the data output. +- Custom PostgreSQL-like commands. ### clickhouse-flamegraph {#clickhouse-flamegraph} @@ -132,15 +132,15 @@ Features: Features: -- Support query history (pagination, clear all, etc.) -- Support selected sql clauses query -- Support terminating query -- Support table management (metadata, delete, preview) -- Support database management (delete, create) -- Support custom query -- Support multiple data sources management(connection test, monitoring) -- Support monitor (processor, connection, query) -- Support migrate data +- Support query history (pagination, clear all, etc.) +- Support selected sql clauses query +- Support terminating query +- Support table management (metadata, delete, preview) +- Support database management (delete, create) +- Support custom query +- Support multiple data sources management(connection test, monitoring) +- Support monitor (processor, connection, query) +- Support migrate data ### Bytebase {#bytebase} @@ -148,13 +148,13 @@ Features: Features: -- Schema review between developers and DBAs. -- Database-as-Code, version control the schema in VCS such GitLab and trigger the deployment upon code commit. -- Streamlined deployment with per-environment policy. -- Full migration history. -- Schema drift detection. -- Backup and restore. -- RBAC. +- Schema review between developers and DBAs. +- Database-as-Code, version control the schema in VCS such GitLab and trigger the deployment upon code commit. +- Streamlined deployment with per-environment policy. +- Full migration history. +- Schema drift detection. +- Backup and restore. +- RBAC. ### Zeppelin-Interpreter-for-ClickHouse {#zeppelin-interpreter-for-clickhouse} @@ -166,9 +166,9 @@ Features: Features: -- An online SQL editor which can run your SQL code without any installing. -- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui. -- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis. +- An online SQL editor which can run your SQL code without any installing. +- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui. +- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis. ### ClickVisual {#clickvisual} @@ -218,12 +218,12 @@ Features: Features: -- Very fast code completion. -- ClickHouse syntax highlighting. -- Support for features specific to ClickHouse, for example, nested columns, table engines. -- Data Editor. -- Refactorings. -- Search and Navigation. +- Very fast code completion. +- ClickHouse syntax highlighting. +- Support for features specific to ClickHouse, for example, nested columns, table engines. +- Data Editor. +- Refactorings. +- Search and Navigation. ### Yandex DataLens {#yandex-datalens} @@ -231,15 +231,15 @@ Features: Features: -- Wide range of available visualizations, from simple bar charts to complex dashboards. -- Dashboards could be made publicly available. -- Support for multiple data sources including ClickHouse. -- Storage for materialized data based on ClickHouse. +- Wide range of available visualizations, from simple bar charts to complex dashboards. +- Dashboards could be made publicly available. +- Support for multiple data sources including ClickHouse. +- Storage for materialized data based on ClickHouse. DataLens is [available for free](https://cloud.yandex.com/docs/datalens/pricing) for low-load projects, even for commercial use. -- [DataLens documentation](https://cloud.yandex.com/docs/datalens/). -- [Tutorial](https://cloud.yandex.com/docs/solutions/datalens/data-from-ch-visualization) on visualizing data from a ClickHouse database. +- [DataLens documentation](https://cloud.yandex.com/docs/datalens/). +- [Tutorial](https://cloud.yandex.com/docs/solutions/datalens/data-from-ch-visualization) on visualizing data from a ClickHouse database. ### Holistics Software {#holistics-software} @@ -247,11 +247,11 @@ DataLens is [available for free](https://cloud.yandex.com/docs/datalens/pricing) Features: -- Automated email, Slack and Google Sheet schedules of reports. -- SQL editor with visualizations, version control, auto-completion, reusable query components and dynamic filters. -- Embedded analytics of reports and dashboards via iframe. -- Data preparation and ETL capabilities. -- SQL data modelling support for relational mapping of data. +- Automated email, Slack and Google Sheet schedules of reports. +- SQL editor with visualizations, version control, auto-completion, reusable query components and dynamic filters. +- Embedded analytics of reports and dashboards via iframe. +- Data preparation and ETL capabilities. +- SQL data modelling support for relational mapping of data. ### Looker {#looker} @@ -260,9 +260,9 @@ to integrate data with other applications. Features: -- Easy and agile development using LookML, a language which supports curated +- Easy and agile development using LookML, a language which supports curated [Data Modeling](https://looker.com/platform/data-modeling) to support report writers and end-users. -- Powerful workflow integration via Looker’s [Data Actions](https://looker.com/platform/actions). +- Powerful workflow integration via Looker’s [Data Actions](https://looker.com/platform/actions). [How to configure ClickHouse in Looker.](https://docs.looker.com/setup-and-management/database-config/clickhouse) @@ -272,12 +272,12 @@ Features: Features: -- Business users-friendly reports builder. -- Powerful report parameters for SQL filtering and report-specific query customizations. -- Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers). -- It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions. -- [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation. -- Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system. +- Business users-friendly reports builder. +- Powerful report parameters for SQL filtering and report-specific query customizations. +- Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers). +- It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions. +- [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation. +- Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system. SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/individual usage. diff --git a/docs/en/interfaces/third-party/index.md b/docs/en/interfaces/third-party/index.md index ad5ed0650a5..adb673d1ff8 100644 --- a/docs/en/interfaces/third-party/index.md +++ b/docs/en/interfaces/third-party/index.md @@ -8,10 +8,10 @@ sidebar_position: 24 This is a collection of links to third-party tools that provide some sort of interface to ClickHouse. It can be either visual interface, command-line interface or an API: -- [Client libraries](../../interfaces/third-party/client-libraries.md) -- [Integrations](../../interfaces/third-party/integrations.md) -- [GUI](../../interfaces/third-party/gui.md) -- [Proxies](../../interfaces/third-party/proxy.md) +- [Client libraries](../../interfaces/third-party/client-libraries.md) +- [Integrations](../../interfaces/third-party/integrations.md) +- [GUI](../../interfaces/third-party/gui.md) +- [Proxies](../../interfaces/third-party/proxy.md) :::note Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 90a4f088be7..3e1b1e84f5d 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -6,110 +6,110 @@ sidebar_label: Integrations # Integration Libraries from Third-party Developers -:::warning Disclaimer +:::note Disclaimer ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. ::: ## Infrastructure Products {#infrastructure-products} -- Relational database management systems - - [MySQL](https://www.mysql.com) - - [mysql2ch](https://github.com/long2ice/mysql2ch) - - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) - - [PostgreSQL](https://www.postgresql.org) - - [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw) - - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pg2ch](https://github.com/mkabilov/pg2ch) - - [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw) - - [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server) - - [ClickHouseMigrator](https://github.com/zlzforever/ClickHouseMigrator) -- Message queues - - [Kafka](https://kafka.apache.org) - - [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/)) - - [stream-loader-clickhouse](https://github.com/adform/stream-loader) -- Batch processing - - [Spark](https://spark.apache.org) - - [spark-clickhouse-connector](https://github.com/housepower/spark-clickhouse-connector) -- Stream processing - - [Flink](https://flink.apache.org) - - [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink) -- Object storages - - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) -- Container orchestration - - [Kubernetes](https://kubernetes.io) - - [clickhouse-operator](https://github.com/Altinity/clickhouse-operator) -- Configuration management - - [puppet](https://puppet.com) - - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) - - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) -- Monitoring - - [Graphite](https://graphiteapp.org) - - [graphouse](https://github.com/ClickHouse/graphouse) - - [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) - - [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse) - - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied - - [Grafana](https://grafana.com/) - - [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) - - [Prometheus](https://prometheus.io/) - - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - - [PromHouse](https://github.com/Percona-Lab/PromHouse) - - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) - - [Nagios](https://www.nagios.org/) - - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - - [check_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py) - - [Zabbix](https://www.zabbix.com) - - [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template) - - [Sematext](https://sematext.com/) - - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) -- Logging - - [rsyslog](https://www.rsyslog.com/) - - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - - [fluentd](https://www.fluentd.org) - - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) - - [logagent](https://www.sematext.com/logagent) - - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) -- Geo - - [MaxMind](https://dev.maxmind.com/geoip/) - - [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip) -- AutoML - - [MindsDB](https://mindsdb.com/) - - [MindsDB](https://github.com/mindsdb/mindsdb) - Predictive AI layer for ClickHouse database. +- Relational database management systems + - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) + - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) + - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) + - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) + - [PostgreSQL](https://www.postgresql.org) + - [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw) + - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [pg2ch](https://github.com/mkabilov/pg2ch) + - [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw) + - [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server) + - [ClickHouseMigrator](https://github.com/zlzforever/ClickHouseMigrator) +- Message queues + - [Kafka](https://kafka.apache.org) + - [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/)) + - [stream-loader-clickhouse](https://github.com/adform/stream-loader) +- Batch processing + - [Spark](https://spark.apache.org) + - [spark-clickhouse-connector](https://github.com/housepower/spark-clickhouse-connector) +- Stream processing + - [Flink](https://flink.apache.org) + - [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink) +- Object storages + - [S3](https://en.wikipedia.org/wiki/Amazon_S3) + - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +- Container orchestration + - [Kubernetes](https://kubernetes.io) + - [clickhouse-operator](https://github.com/Altinity/clickhouse-operator) +- Configuration management + - [puppet](https://puppet.com) + - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) +- Monitoring + - [Graphite](https://graphiteapp.org) + - [graphouse](https://github.com/ClickHouse/graphouse) + - [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) + - [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse) + - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied + - [Grafana](https://grafana.com/) + - [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) + - [Prometheus](https://prometheus.io/) + - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) + - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) + - [Nagios](https://www.nagios.org/) + - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) + - [check_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py) + - [Zabbix](https://www.zabbix.com) + - [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template) + - [Sematext](https://sematext.com/) + - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) +- Logging + - [rsyslog](https://www.rsyslog.com/) + - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) + - [fluentd](https://www.fluentd.org) + - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) + - [logagent](https://www.sematext.com/logagent) + - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) +- Geo + - [MaxMind](https://dev.maxmind.com/geoip/) + - [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip) +- AutoML + - [MindsDB](https://mindsdb.com/) + - [MindsDB](https://github.com/mindsdb/mindsdb) - Predictive AI layer for ClickHouse database. ## Programming Language Ecosystems {#programming-language-ecosystems} -- Python - - [SQLAlchemy](https://www.sqlalchemy.org) - - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pandas](https://pandas.pydata.org) - - [pandahouse](https://github.com/kszucs/pandahouse) -- PHP - - [Doctrine](https://www.doctrine-project.org/) - - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) -- R - - [dplyr](https://db.rstudio.com/dplyr/) - - [RClickHouse](https://github.com/IMSMWU/RClickHouse) (uses [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) -- Java - - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../../sql-reference/table-functions/jdbc.md)) -- Scala - - [Akka](https://akka.io) - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) -- C# - - [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview) - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) - - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) - - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) - - [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations) - - [Linq To DB](https://github.com/linq2db/linq2db) -- Elixir - - [Ecto](https://github.com/elixir-ecto/ecto) - - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) -- Ruby - - [Ruby on Rails](https://rubyonrails.org/) - - [activecube](https://github.com/bitquery/activecube) - - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - - [GraphQL](https://github.com/graphql) - - [activecube-graphql](https://github.com/bitquery/activecube-graphql) +- Python + - [SQLAlchemy](https://www.sqlalchemy.org) + - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [pandas](https://pandas.pydata.org) + - [pandahouse](https://github.com/kszucs/pandahouse) +- PHP + - [Doctrine](https://www.doctrine-project.org/) + - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) +- R + - [dplyr](https://db.rstudio.com/dplyr/) + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) (uses [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) +- Java + - [Hadoop](http://hadoop.apache.org) + - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../../sql-reference/table-functions/jdbc.md)) +- Scala + - [Akka](https://akka.io) + - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) +- C# + - [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview) + - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) + - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) + - [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations) + - [Linq To DB](https://github.com/linq2db/linq2db) +- Elixir + - [Ecto](https://github.com/elixir-ecto/ecto) + - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) diff --git a/docs/en/interfaces/third-party/proxy.md b/docs/en/interfaces/third-party/proxy.md index 4fd3f31f08b..43063ea4c1d 100644 --- a/docs/en/interfaces/third-party/proxy.md +++ b/docs/en/interfaces/third-party/proxy.md @@ -12,9 +12,9 @@ sidebar_label: Proxies Features: -- Per-user routing and response caching. -- Flexible limits. -- Automatic SSL certificate renewal. +- Per-user routing and response caching. +- Flexible limits. +- Automatic SSL certificate renewal. Implemented in Go. @@ -24,9 +24,9 @@ Implemented in Go. Features: -- In-memory and on-disk data buffering. -- Per-table routing. -- Load-balancing and health checking. +- In-memory and on-disk data buffering. +- Per-table routing. +- Load-balancing and health checking. Implemented in Go. @@ -36,8 +36,8 @@ Implemented in Go. Features: -- Group requests and send by threshold or interval. -- Multiple remote servers. -- Basic authentication. +- Group requests and send by threshold or interval. +- Multiple remote servers. +- Basic authentication. Implemented in Go. diff --git a/docs/en/operations/_category_.yml b/docs/en/operations/_category_.yml index 08849e7489d..352809f663b 100644 --- a/docs/en/operations/_category_.yml +++ b/docs/en/operations/_category_.yml @@ -2,7 +2,3 @@ position: 70 label: 'Operations' collapsible: true collapsed: true -link: - type: generated-index - title: Operations - slug: /en/operations diff --git a/docs/en/operations/_troubleshooting.md b/docs/en/operations/_troubleshooting.md index a5c07ed18bd..dbb0dad7976 100644 --- a/docs/en/operations/_troubleshooting.md +++ b/docs/en/operations/_troubleshooting.md @@ -1,17 +1,17 @@ [//]: # (This file is included in FAQ > Troubleshooting) -- [Installation](#troubleshooting-installation-errors) -- [Connecting to the server](#troubleshooting-accepts-no-connections) -- [Query processing](#troubleshooting-does-not-process-queries) -- [Efficiency of query processing](#troubleshooting-too-slow) +- [Installation](#troubleshooting-installation-errors) +- [Connecting to the server](#troubleshooting-accepts-no-connections) +- [Query processing](#troubleshooting-does-not-process-queries) +- [Efficiency of query processing](#troubleshooting-too-slow) ## Installation {#troubleshooting-installation-errors} ### You Cannot Get Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} -- Check firewall settings. -- If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. +- Check firewall settings. +- If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. ### You Cannot Update Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-update-deb-packages-from-clickhouse-repository-with-apt-get} @@ -73,8 +73,8 @@ After that follow the [install guide](../getting-started/install.md#from-rpm-pac Possible issues: -- The server is not running. -- Unexpected or wrong configuration parameters. +- The server is not running. +- Unexpected or wrong configuration parameters. ### Server Is Not Running {#server-is-not-running} @@ -98,8 +98,8 @@ The main log of `clickhouse-server` is in `/var/log/clickhouse-server/clickhouse If the server started successfully, you should see the strings: -- ` Application: starting up.` — Server started. -- ` Application: Ready for connections.` — Server is running and ready for connections. +- ` Application: starting up.` — Server started. +- ` Application: Ready for connections.` — Server is running and ready for connections. If `clickhouse-server` start failed with a configuration error, you should see the `` string with an error description. For example: @@ -149,30 +149,30 @@ This command starts the server as an interactive app with standard parameters of Check: -- Docker settings. +- Docker settings. If you run ClickHouse in Docker in an IPv6 network, make sure that `network=host` is set. -- Endpoint settings. +- Endpoint settings. Check [listen_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings. ClickHouse server accepts localhost connections only by default. -- HTTP protocol settings. +- HTTP protocol settings. Check protocol settings for the HTTP API. -- Secure connection settings. +- Secure connection settings. Check: - - The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. - - Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). + - The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. + - Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`. -- User settings. +- User settings. You might be using the wrong user name or password. diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md deleted file mode 100644 index 4c4a06dbe1e..00000000000 --- a/docs/en/operations/access-rights.md +++ /dev/null @@ -1,152 +0,0 @@ ---- -slug: /en/operations/access-rights -sidebar_position: 48 -sidebar_label: Access Control and Account Management -title: Access Control and Account Management ---- - -ClickHouse supports access control management based on [RBAC](https://en.wikipedia.org/wiki/Role-based_access_control) approach. - -ClickHouse access entities: -- [User account](#user-account-management) -- [Role](#role-management) -- [Row Policy](#row-policy-management) -- [Settings Profile](#settings-profiles-management) -- [Quota](#quotas-management) - -You can configure access entities using: - -- SQL-driven workflow. - - You need to [enable](#enabling-access-control) this functionality. - -- Server [configuration files](../operations/configuration-files.md) `users.xml` and `config.xml`. - -We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow. - -:::warning -You can’t manage the same access entity by both configuration methods simultaneously. -::: - -To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement. - -## Usage {#access-control-usage} - -By default, the ClickHouse server provides the `default` user account which is not allowed using SQL-driven access control and account management but has all the rights and permissions. The `default` user account is used in any cases when the username is not defined, for example, at login from client or in distributed queries. In distributed query processing a default user account is used, if the configuration of the server or cluster does not specify the [user and password](../engines/table-engines/special/distributed.md) properties. - -If you just started using ClickHouse, consider the following scenario: - -1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user. -2. Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`). -3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it. - -### Properties of Current Solution {#access-control-properties} - -- You can grant permissions for databases and tables even if they do not exist. -- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query. -- There are no lifetime settings for privileges. - -## User Account {#user-account-management} - -A user account is an access entity that allows to authorize someone in ClickHouse. A user account contains: - -- Identification information. -- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute. -- Hosts allowed to connect to the ClickHouse server. -- Assigned and default roles. -- Settings with their constraints applied by default at user login. -- Assigned settings profiles. - -Privileges can be granted to a user account by the [GRANT](../sql-reference/statements/grant.md) query or by assigning [roles](#role-management). To revoke privileges from a user, ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. To list privileges for a user, use the [SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement) statement. - -Management queries: - -- [CREATE USER](../sql-reference/statements/create/user.md) -- [ALTER USER](../sql-reference/statements/alter/user.md#alter-user-statement) -- [DROP USER](../sql-reference/statements/drop.md) -- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) -- [SHOW USERS](../sql-reference/statements/show.md#show-users-statement) - -### Settings Applying {#access-control-settings-applying} - -Settings can be configured differently: for a user account, in its granted roles and in settings profiles. At user login, if a setting is configured for different access entities, the value and constraints of this setting are applied as follows (from higher to lower priority): - -1. User account settings. -2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined. -3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined. -4. Settings applied to all the server by default or from the [default profile](../operations/server-configuration-parameters/settings.md#default-profile). - -## Role {#role-management} - -Role is a container for access entities that can be granted to a user account. - -Role contains: - -- [Privileges](../sql-reference/statements/grant.md#grant-privileges) -- Settings and constraints -- List of assigned roles - -Management queries: - -- [CREATE ROLE](../sql-reference/statements/create/role.md) -- [ALTER ROLE](../sql-reference/statements/alter/role.md#alter-role-statement) -- [DROP ROLE](../sql-reference/statements/drop.md) -- [SET ROLE](../sql-reference/statements/set-role.md) -- [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role-statement) -- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) -- [SHOW ROLES](../sql-reference/statements/show.md#show-roles-statement) - -Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. - -## Row Policy {#row-policy-management} - -Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. - -:::warning -Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. -::: - -Management queries: - -- [CREATE ROW POLICY](../sql-reference/statements/create/row-policy.md) -- [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md#alter-row-policy-statement) -- [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy-statement) -- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) -- [SHOW POLICIES](../sql-reference/statements/show.md#show-policies-statement) - -## Settings Profile {#settings-profiles-management} - -Settings profile is a collection of [settings](../operations/settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied. - -Management queries: - -- [CREATE SETTINGS PROFILE](../sql-reference/statements/create/settings-profile.md#create-settings-profile-statement) -- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md#alter-settings-profile-statement) -- [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile-statement) -- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) -- [SHOW PROFILES](../sql-reference/statements/show.md#show-profiles-statement) - -## Quota {#quotas-management} - -Quota limits resource usage. See [Quotas](../operations/quotas.md). - -Quota contains a set of limits for some durations, as well as a list of roles and/or users which should use this quota. - -Management queries: - -- [CREATE QUOTA](../sql-reference/statements/create/quota.md) -- [ALTER QUOTA](../sql-reference/statements/alter/quota.md#alter-quota-statement) -- [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota-statement) -- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) -- [SHOW QUOTA](../sql-reference/statements/show.md#show-quota-statement) -- [SHOW QUOTAS](../sql-reference/statements/show.md#show-quotas-statement) - -## Enabling SQL-driven Access Control and Account Management {#enabling-access-control} - -- Setup a directory for configurations storage. - - ClickHouse stores access entity configurations in the folder set in the [access_control_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter. - -- Enable SQL-driven access control and account management for at least one user account. - - By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1. diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index f1a5649cd4c..6da61833c12 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -1,5 +1,6 @@ --- slug: /en/operations/backup +description: In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data. --- # Backup and Restore @@ -29,7 +30,7 @@ slug: /en/operations/backup ``` :::note ALL -`ALL` is only applicable to the `RESTORE` command. +`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse. ::: ## Background @@ -213,7 +214,7 @@ To write backups to an S3 bucket you need three pieces of information: for example `Abc+123` :::note -Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/configuring-s3-for-clickhouse-use.md), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket. +Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/index.md#configuring-s3-for-clickhouse-use), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket. ::: The destination for a backup will be specified like this: @@ -330,7 +331,7 @@ It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the
- s3 + s3_plain
@@ -382,3 +383,19 @@ Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ... For more information about queries related to partition manipulations, see the [ALTER documentation](../sql-reference/statements/alter/partition.md#alter_manipulations-with-partitions). A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). + +## Settings to disallow concurrent backup/restore + +To disallow concurrent backup/restore, you can use these settings respectively. + +```xml + + + false + false + + +``` + +The default value for both is true, so by default concurrent backup/restores are allowed. +When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time. \ No newline at end of file diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 0f9156048c4..5bc16b6f51d 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -3,6 +3,7 @@ slug: /en/operations/caches sidebar_position: 65 sidebar_label: Caches title: "Cache Types" +description: When performing queries, ClickHouse uses different caches. --- When performing queries, ClickHouse uses different caches. @@ -22,6 +23,6 @@ Additional cache types: - [Dictionaries](../sql-reference/dictionaries/index.md) data cache. - Schema inference cache. - [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks. -- [(Experimental) Query cache](query-cache.md). +- [Query cache](query-cache.md). To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements. diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md deleted file mode 100644 index 10bad586a54..00000000000 --- a/docs/en/operations/clickhouse-keeper.md +++ /dev/null @@ -1,378 +0,0 @@ ---- -slug: /en/operations/clickhouse-keeper -sidebar_position: 66 -sidebar_label: ClickHouse Keeper ---- - -# ClickHouse Keeper -import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md'; - - - -ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper. - -## Implementation details {#implementation-details} - -ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, and has quite a simple and powerful data model. ZooKeeper's coordination algorithm, ZooKeeper Atomic Broadcast (ZAB), doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses the [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows linearizability for reads and writes, and has several open-source implementations in different languages. - -By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but the `clickhouse-keeper-converter` tool enables the conversion of ZooKeeper data to ClickHouse Keeper snapshots. The interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so a mixed ZooKeeper / ClickHouse Keeper cluster is impossible. - -ClickHouse Keeper supports Access Control Lists (ACLs) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth` and `digest`. The digest authentication scheme uses the pair `username:password`, the password is encoded in Base64. - -:::note -External integrations are not supported. -::: - -## Configuration {#configuration} - -ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server. In both cases the configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is ``. Keeper configuration has the following parameters: - -- `tcp_port` — Port for a client to connect (default for ZooKeeper is `2181`). -- `tcp_port_secure` — Secure port for an SSL connection between client and keeper-server. -- `server_id` — Unique server id, each participant of the ClickHouse Keeper cluster must have a unique number (1, 2, 3, and so on). -- `log_storage_path` — Path to coordination logs, just like ZooKeeper it is best to store logs on non-busy nodes. -- `snapshot_storage_path` — Path to coordination snapshots. - -Other common parameters are inherited from the ClickHouse server config (`listen_host`, `logger`, and so on). - -Internal coordination settings are located in the `.` section: - -- `operation_timeout_ms` — Timeout for a single client operation (ms) (default: 10000). -- `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000). -- `session_timeout_ms` — Max timeout for client session (ms) (default: 100000). -- `dead_session_check_period_ms` — How often ClickHouse Keeper checks for dead sessions and removes them (ms) (default: 500). -- `heart_beat_interval_ms` — How often a ClickHouse Keeper leader will send heartbeats to followers (ms) (default: 500). -- `election_timeout_lower_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it can initiate leader election (default: 1000). Must be less than or equal to `election_timeout_upper_bound_ms`. Ideally they shouldn't be equal. -- `election_timeout_upper_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it must initiate leader election (default: 2000). -- `rotate_log_storage_interval` — How many log records to store in a single file (default: 100000). -- `reserved_log_items` — How many coordination log records to store before compaction (default: 100000). -- `snapshot_distance` — How often ClickHouse Keeper will create new snapshots (in the number of records in logs) (default: 100000). -- `snapshots_to_keep` — How many snapshots to keep (default: 3). -- `stale_log_gap` — Threshold when leader considers follower as stale and sends the snapshot to it instead of logs (default: 10000). -- `fresh_log_gap` — When node became fresh (default: 200). -- `max_requests_batch_size` - Max size of batch in requests count before it will be sent to RAFT (default: 100). -- `force_sync` — Call `fsync` on each write to coordination log (default: true). -- `quorum_reads` — Execute read requests as writes through whole RAFT consensus with similar speed (default: false). -- `raft_logs_level` — Text logging level about coordination (trace, debug, and so on) (default: system default). -- `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true). -- `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000). -- `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000). -- `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`). - -Quorum configuration is located in the `.` section and contain servers description. - -The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The parameter can be set `true` if SSL connection is required for internal communication between nodes, or left unspecified otherwise. - -The main parameters for each `` are: - -- `id` — Server identifier in a quorum. -- `hostname` — Hostname where this server is placed. -- `port` — Port where this server listens for connections. - -:::note -In the case of a change in the topology of your ClickHouse Keeper cluster (e.g., replacing a server), please make sure to keep the mapping of `server_id` to `hostname` consistent and avoid shuffling or reusing an existing `server_id` for different servers (e.g., it can happen if your rely on automation scripts to deploy ClickHouse Keeper) -::: - -Examples of configuration for quorum with three nodes can be found in [integration tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) with `test_keeper_` prefix. Example configuration for server #1: - -```xml - - 2181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 10000 - 30000 - trace - - - - - 1 - zoo1 - 9444 - - - 2 - zoo2 - 9444 - - - 3 - zoo3 - 9444 - - - -``` - -## How to run {#how-to-run} - -ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with: - -```bash -clickhouse-keeper --config /etc/your_path_to_config/config.xml -``` - -If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as an argument to `clickhouse`: - -```bash -clickhouse keeper --config /etc/your_path_to_config/config.xml -``` - -## Four Letter Word Commands {#four-letter-word-commands} - -ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. - -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`. - -You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. - -``` -echo mntr | nc localhost 9181 -``` - -Bellow is the detailed 4lw commands: - -- `ruok`: Tests if server is running in a non-error state. The server will respond with `imok` if it is running. Otherwise it will not respond at all. A response of `imok` does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information. - -``` -imok -``` - -- `mntr`: Outputs a list of variables that could be used for monitoring the health of the cluster. - -``` -zk_version v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 -zk_avg_latency 0 -zk_max_latency 0 -zk_min_latency 0 -zk_packets_received 68 -zk_packets_sent 68 -zk_num_alive_connections 1 -zk_outstanding_requests 0 -zk_server_state leader -zk_znode_count 4 -zk_watch_count 1 -zk_ephemerals_count 0 -zk_approximate_data_size 723 -zk_open_file_descriptor_count 310 -zk_max_file_descriptor_count 10240 -zk_followers 0 -zk_synced_followers 0 -``` - -- `srvr`: Lists full details for the server. - -``` -ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 -Latency min/avg/max: 0/0/0 -Received: 2 -Sent : 2 -Connections: 1 -Outstanding: 0 -Zxid: 34 -Mode: leader -Node count: 4 -``` - -- `stat`: Lists brief details for the server and connected clients. - -``` -ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 -Clients: - 192.168.1.1:52852(recved=0,sent=0) - 192.168.1.1:52042(recved=24,sent=48) -Latency min/avg/max: 0/0/0 -Received: 4 -Sent : 4 -Connections: 1 -Outstanding: 0 -Zxid: 36 -Mode: leader -Node count: 4 -``` - -- `srst`: Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`. - -``` -Server stats reset. -``` - -- `conf`: Print details about serving configuration. - -``` -server_id=1 -tcp_port=2181 -four_letter_word_white_list=* -log_storage_path=./coordination/logs -snapshot_storage_path=./coordination/snapshots -max_requests_batch_size=100 -session_timeout_ms=30000 -operation_timeout_ms=10000 -dead_session_check_period_ms=500 -heart_beat_interval_ms=500 -election_timeout_lower_bound_ms=1000 -election_timeout_upper_bound_ms=2000 -reserved_log_items=1000000000000000 -snapshot_distance=10000 -auto_forwarding=true -shutdown_timeout=5000 -startup_timeout=240000 -raft_logs_level=information -snapshots_to_keep=3 -rotate_log_storage_interval=100000 -stale_log_gap=10000 -fresh_log_gap=200 -max_requests_batch_size=100 -quorum_reads=false -force_sync=false -compress_logs=true -compress_snapshots_with_zstd_format=true -configuration_change_tries_count=20 -``` - -- `cons`: List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc... - -``` - 192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0) - 192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0) -``` - -- `crst`: Reset connection/session statistics for all connections. - -``` -Connection stats reset. -``` - -- `envi`: Print details about serving environment - -``` -Environment: -clickhouse.keeper.version=v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 -host.name=ZBMAC-C02D4054M.local -os.name=Darwin -os.arch=x86_64 -os.version=19.6.0 -cpu.count=12 -user.name=root -user.home=/Users/JackyWoo/ -user.dir=/Users/JackyWoo/project/jd/clickhouse/cmake-build-debug/programs/ -user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/ -``` - - -- `dirs`: Shows the total size of snapshot and log files in bytes - -``` -snapshot_dir_size: 0 -log_dir_size: 3875 -``` - -- `isro`: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode. - -``` -rw -``` - -- `wchs`: Lists brief information on watches for the server. - -``` -1 connections watching 1 paths -Total watches:1 -``` - -- `wchc`: Lists detailed information on watches for the server, by session. This outputs a list of sessions (connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully. - -``` -0x0000000000000001 - /clickhouse/task_queue/ddl -``` - -- `wchp`: Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (i. e. impact server performance), use it carefully. - -``` -/clickhouse/task_queue/ddl - 0x0000000000000001 -``` - -- `dump`: Lists the outstanding sessions and ephemeral nodes. This only works on the leader. - -``` -Sessions dump (2): -0x0000000000000001 -0x0000000000000002 -Sessions with Ephemerals (1): -0x0000000000000001 - /clickhouse/task_queue/ddl -``` - -- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. Note that `lgif` command can help you determine whether the snapshot is done. - -``` -100 -``` - -- `lgif`: Keeper log information. `first_log_idx` : my first log index in log store; `first_log_term` : my first log term; `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot. - -``` -first_log_idx 1 -first_log_term 1 -last_log_idx 101 -last_log_term 1 -last_committed_log_idx 100 -leader_committed_log_idx 101 -target_committed_log_idx 101 -last_snapshot_idx 50 -``` - -- `rqld`: Request to become new leader. Return `Sent leadership request to leader.` if request sent or `Failed to send leadership request to leader.` if request not sent. Note that if node is already leader the outcome is same as the request is sent. - -``` -Sent leadership request to leader. -``` - -## Migration from ZooKeeper {#migration-from-zookeeper} - -Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration: - -1. Stop all ZooKeeper nodes. - -2. Optional, but recommended: find ZooKeeper leader node, start and stop it again. It will force ZooKeeper to create a consistent snapshot. - -3. Run `clickhouse-keeper-converter` on a leader, for example: - -```bash -clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots -``` - -4. Copy snapshot to ClickHouse server nodes with a configured `keeper` or start ClickHouse Keeper instead of ZooKeeper. The snapshot must persist on all nodes, otherwise, empty nodes can be faster and one of them can become a leader. - - - -## Recovering after losing quorum - -Because ClickHouse Keeper uses Raft it can tolerate certain amount of node crashes depending on the cluster size. \ -E.g. for a 3-node cluster, it will continue working correctly if only 1 node crashes. - -Cluster configuration can be dynamically configured but there are some limitations. Reconfiguration relies on Raft also -so to add/remove a node from the cluster you need to have a quorum. If you lose too many nodes in your cluster at the same time without any chance -of starting them again, Raft will stop working and not allow you to reconfigure your cluster using the conventional way. - -Nevertheless, ClickHouse Keeper has a recovery mode which allows you to forcefully reconfigure your cluster with only 1 node. -This should be done only as your last resort if you cannot start your nodes again, or start a new instance on the same endpoint. - -Important things to note before continuing: -- Make sure that the failed nodes cannot connect to the cluster again. -- Do not start any of the new nodes until it's specified in the steps. - -After making sure that the above things are true, you need to do following: -1. Pick a single Keeper node to be your new leader. Be aware that the data of that node will be used for the entire cluster so we recommend to use a node with the most up to date state. -2. Before doing anything else, make a backup of the `log_storage_path` and `snapshot_storage_path` folders of the picked node. -3. Reconfigure the cluster on all of the nodes you want to use. -4. Send the four letter command `rcvr` to the node you picked which will move the node to the recovery mode OR stop Keeper instance on the picked node and start it again with the `--force-recovery` argument. -5. One by one, start Keeper instances on the new nodes making sure that `mntr` returns `follower` for the `zk_server_state` before starting the next one. -6. While in the recovery mode, the leader node will return error message for `mntr` command until it achieves quorum with the new nodes and refuse any requests from the client and the followers. -7. After quorum is achieved, the leader node will return to the normal mode of operation, accepting all the requests using Raft - verify with `mntr` which should return `leader` for the `zk_server_state`. diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 2e241ec1980..b3583e156ad 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -14,10 +14,10 @@ All XML files should have the same root element, usually ``. As for Some settings specified in the main configuration file can be overridden in other configuration files: -- The `replace` or `remove` attributes can be specified for the elements of these configuration files. -- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. -- If `replace` is specified, it replaces the entire element with the specified one. -- If `remove` is specified, it deletes the element. +- The `replace` or `remove` attributes can be specified for the elements of these configuration files. +- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. +- If `replace` is specified, it replaces the entire element with the specified one. +- If `remove` is specified, it deletes the element. You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`: diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index 95944e96194..27dc23c9792 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -59,11 +59,11 @@ With filtering by realm: ``` -:::warning +:::note You can define only one `kerberos` section. The presence of multiple `kerberos` sections will force ClickHouse to disable Kerberos authentication. ::: -:::warning +:::note `principal` and `realm` sections cannot be specified at the same time. The presence of both `principal` and `realm` sections will force ClickHouse to disable Kerberos authentication. ::: @@ -103,7 +103,7 @@ Example (goes into `users.xml`):
``` -:::warning +:::note Note that Kerberos authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `kerberos` will force ClickHouse to shutdown. ::: @@ -113,7 +113,7 @@ Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in ### Enabling Kerberos using SQL {#enabling-kerberos-using-sql} -When [SQL-driven Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements. +When [SQL-driven Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements. ```sql CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM' diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md index eba560f6ea5..ee2336e9378 100644 --- a/docs/en/operations/external-authenticators/ldap.md +++ b/docs/en/operations/external-authenticators/ldap.md @@ -112,7 +112,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously. -When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled, users that are authenticated by LDAP servers can also be created using the [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement. +When SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled, users that are authenticated by LDAP servers can also be created using the [CREATE USER](/docs/en/sql-reference/statements/create/user.md#create-user-statement) statement. Query: @@ -120,11 +120,11 @@ Query: CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server'; ``` -## LDAP Exernal User Directory {#ldap-external-user-directory} +## LDAP External User Directory {#ldap-external-user-directory} In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. To achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file. -At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. If the user is not defined, ClickHouse will assume the definition exists in the external LDAP directory and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. +At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. If the user is not defined, ClickHouse will assume the definition exists in the external LDAP directory and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled and roles are created using the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement. **Example** @@ -173,7 +173,7 @@ Note that `my_ldap_server` referred in the `ldap` section inside the `user_direc - `roles` — Section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. - If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication. - `role_mapping` — Section with LDAP search parameters and mapping rules. - - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. + - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement. - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. - `base_dn` — Template used to construct the base DN for the LDAP search. - The resulting DN will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{user_dn}` substrings of the template with the actual user name, bind DN, and user DN during each LDAP search. diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 2b3c4bdbbdf..ebf981690a9 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -2,6 +2,7 @@ slug: /en/operations/monitoring sidebar_position: 45 sidebar_label: Monitoring +description: You can monitor the utilization of hardware resources and also ClickHouse server metrics. --- # Monitoring @@ -11,15 +12,15 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md You can monitor: -- Utilization of hardware resources. -- ClickHouse server metrics. +- Utilization of hardware resources. +- ClickHouse server metrics. ## Resource Utilization {#resource-utilization} ClickHouse also monitors the state of hardware resources by itself such as: -- Load and temperature on processors. -- Utilization of storage system, RAM and network. +- Load and temperature on processors. +- Utilization of storage system, RAM and network. This data is collected in the `system.asynchronous_metric_log` table. @@ -31,8 +32,8 @@ To track server events use server logs. See the [logger](../operations/server-co ClickHouse collects: -- Different metrics of how the server uses computational resources. -- Common statistics on query processing. +- Different metrics of how the server uses computational resources. +- Common statistics on query processing. You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables. diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index f3cfa4a5372..a521a369721 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -2,34 +2,115 @@ slug: /en/operations/named-collections sidebar_position: 69 sidebar_label: "Named collections" +title: "Named collections" --- -# Storing details for connecting to external sources in configuration files +Named collections provide a way to store collections of key-value pairs to be +used to configure integrations with external sources. You can use named collections with +dictionaries, tables, table functions, and object storage. -Details for connecting to external sources (dictionaries, tables, table functions) can be saved -in configuration files and thus simplify the creation of objects and hide credentials -from users with only SQL access. +Named collections can be configured with DDL or in configuration files and are applied +when ClickHouse starts. They simplify the creation of objects and the hiding of credentials +from users without administrative access. -Parameters can be set in XML `CSV` and overridden in SQL `, format = 'TSV'`. -The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`. +The keys in a named collection must match the parameter names of the corresponding +function, table engine, database, etc. In the examples below the parameter list is +linked to for each type. -Named collections are stored in the `config.xml` file of the ClickHouse server in the `` section and are applied when ClickHouse starts. +Parameters set in a named collection can be overridden in SQL, this is shown in the examples +below. -Example of configuration: -```xml -$ cat /etc/clickhouse-server/config.d/named_collections.xml +## Storing named collections in the system database + +### DDL example + +```sql +CREATE NAMED COLLECTION name AS +key_1 = 'value', +key_2 = 'value2', +url = 'https://connection.url/' +``` + +### Permissions to create named collections with DDL + +To manage named collections with DDL a user must have the `named_control_collection` privilege. This can be assigned by adding a file to `/etc/clickhouse-server/users.d/`. The example gives the user `default` both the `access_management` and `named_collection_control` privileges: + +```xml title='/etc/clickhouse-server/users.d/user_default.xml' + + + + 65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5 + 1 + + 1 + + + + +``` + +:::tip +In the above example the `passowrd_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. +::: + +## Storing named collections in configuration files + +### XML example + +```xml title='/etc/clickhouse-server/config.d/named_collections.xml' - ... + + value + value_2 + https://connection.url/ + ``` -## Named collections for accessing S3. +## Modifying named collections + +Named collections that are created with DDL queries can be altered or dropped with DDL. Named collections created with XML files can be managed by editing or deleting the corresponding XML. + +### Alter a DDL named collection + +Change or add the keys `key1` and `key3` of the collection `collection2`: +```sql +ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3' +``` + +Remove the key `key2` from `collection2`: +```sql +ALTER NAMED COLLECTION collection2 DELETE key2 +``` + +Change or add the key `key1` and delete the key `key3` of the collection `collection2`: +```sql +ALTER NAMED COLLECTION collection2 SET key1=4, DELETE key3 +``` + +### Drop the DDL named collection `collection2`: +```sql +DROP NAMED COLLECTION collection2 +``` + +## Named collections for accessing S3 The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md). -Example of configuration: +### DDL example + +```sql +CREATE NAMED COLLECTION s3_mydata AS +access_key_id = 'AKIAIOSFODNN7EXAMPLE', +secret_access_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', +format = 'CSV', +url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/' +``` + +### XML example + ```xml @@ -43,23 +124,23 @@ Example of configuration: ``` -### Example of using named collections with the s3 function +### s3() function and S3 Table named collection examples + +Both of the following examples use the same named collection `s3_mydata`: + +#### s3() function ```sql INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') SELECT * FROM numbers(10000); - -SELECT count() -FROM s3(s3_mydata, filename = 'test_file.tsv.gz') - -┌─count()─┐ -│ 10000 │ -└─────────┘ -1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.) ``` -### Example of using named collections with an S3 table +:::tip +The first argument to the `s3()` function above is the name of the collection, `s3_mydata`. Without named collections, the access key ID, secret, format, and URL would all be passed in every call to the `s3()` function. +::: + +#### S3 table ```sql CREATE TABLE s3_engine_table (number Int64) @@ -78,7 +159,22 @@ SELECT * FROM s3_engine_table LIMIT 3; The description of parameters see [mysql](../sql-reference/table-functions/mysql.md). -Example of configuration: +### DDL example + +```sql +CREATE NAMED COLLECTION mymysql AS +user = 'myuser', +password = 'mypass', +host = '127.0.0.1', +port = 3306, +database = 'test', +connection_pool_size = 8, +on_duplicate_clause = 1, +replace_query = 1 +``` + +### XML example + ```xml @@ -96,7 +192,11 @@ Example of configuration: ``` -### Example of using named collections with the mysql function +### mysql() function, MySQL table, MySQL database, and Dictionary named collection examples + +The four following examples use the same named collection `mymysql`: + +#### mysql() function ```sql SELECT count() FROM mysql(mymysql, table = 'test'); @@ -105,8 +205,11 @@ SELECT count() FROM mysql(mymysql, table = 'test'); │ 3 │ └─────────┘ ``` +:::note +The named collection does not specify the `table` parameter, so it is specified in the function call as `table = 'test'`. +::: -### Example of using named collections with an MySQL table +#### MySQL table ```sql CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0); @@ -117,7 +220,11 @@ SELECT count() FROM mytable; └─────────┘ ``` -### Example of using named collections with database with engine MySQL +:::note +The DDL overrides the named collection setting for connection_pool_size. +::: + +#### MySQL database ```sql CREATE DATABASE mydatabase ENGINE = MySQL(mymysql); @@ -130,7 +237,7 @@ SHOW TABLES FROM mydatabase; └────────┘ ``` -### Example of using named collections with a dictionary with source MySQL +#### MySQL Dictionary ```sql CREATE DICTIONARY dict (A Int64, B String) @@ -150,6 +257,17 @@ SELECT dictGet('dict', 'B', 2); The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). +```sql +CREATE NAMED COLLECTION mypg AS +user = 'pguser', +password = 'jw8s0F4', +host = '127.0.0.1', +port = 5432, +database = 'test', +schema = 'test_schema', +connection_pool_size = 8 +``` + Example of configuration: ```xml @@ -229,12 +347,22 @@ SELECT dictGet('dict', 'b', 2); └─────────────────────────┘ ``` -## Named collections for accessing remote ClickHouse database +## Named collections for accessing a remote ClickHouse database The description of parameters see [remote](../sql-reference/table-functions/remote.md/#parameters). Example of configuration: +```sql +CREATE NAMED COLLECTION remote1 AS +host = 'remote_host', +port = 9000, +database = 'system', +user = 'foo', +password = 'secret', +secure = 1 +``` + ```xml @@ -244,10 +372,12 @@ Example of configuration: system foo secret + 1 ``` +`secure` is not needed for connection because of `remoteSecure`, but it can be used for dictionaries. ### Example of using named collections with the `remote`/`remoteSecure` functions @@ -284,3 +414,4 @@ SELECT dictGet('dict', 'b', 1); │ a │ └─────────────────────────┘ ``` + diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 1de5a09db0c..70f64d08ba3 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -7,7 +7,7 @@ title: "[experimental] Tracing ClickHouse with OpenTelemetry" [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry. -:::warning +:::note This is an experimental feature that will change in backwards-incompatible ways in future releases. ::: @@ -61,3 +61,7 @@ FROM system.opentelemetry_span_log ``` In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive. + +## Related Content + +- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse) diff --git a/docs/en/operations/optimizing-performance/index.md b/docs/en/operations/optimizing-performance/index.md index e25f3b4adb7..83e9430ed27 100644 --- a/docs/en/operations/optimizing-performance/index.md +++ b/docs/en/operations/optimizing-performance/index.md @@ -6,4 +6,4 @@ sidebar_position: 52 # Optimizing Performance -- [Sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md) +- [Sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md) diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 72eb655101f..f5d0e5d6aed 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -7,17 +7,29 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m # Sampling Query Profiler - - ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time. -To use profiler: +Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows -- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. +``` sql +SELECT + count(), + arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym +FROM system.trace_log +WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) +GROUP BY trace +ORDER BY count() DESC +LIMIT 10 +SETTINGS allow_introspection_functions = 1 +``` + +In self-managed deployments, to use query profiler: + +- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. This section configures the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse does not clean up the table and all the stored virtual memory address may become invalid. -- Setup the [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. +- Setup the [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. @@ -25,13 +37,13 @@ The default sampling frequency is one sample per second and both CPU and real ti To analyze the `trace_log` system table: -- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). +- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). -- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. +- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. For security reasons, introspection functions are disabled by default. -- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. +- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui.md#clickhouse-flamegraph-clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). @@ -39,14 +51,14 @@ If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/thi In this example we: -- Filtering `trace_log` data by a query identifier and the current date. +- Filtering `trace_log` data by a query identifier and the current date. -- Aggregating by stack trace. +- Aggregating by stack trace. -- Using introspection functions, we will get a report of: +- Using introspection functions, we will get a report of: - - Names of symbols and corresponding source code functions. - - Source code locations of these functions. + - Names of symbols and corresponding source code functions. + - Source code locations of these functions. diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 1a486de7904..bfa51650cd8 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -1,10 +1,10 @@ --- slug: /en/operations/query-cache sidebar_position: 65 -sidebar_label: Query Cache [experimental] +sidebar_label: Query Cache --- -# Query Cache [experimental] +# Query Cache The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache. Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server. @@ -29,21 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. This reduces maintenance effort and avoids redundancy. -:::warning -The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query -processing) where wrong results are returned. -::: - ## Configuration Settings and Usage -As long as the result cache is experimental it must be activated using the following configuration setting: - -```sql -SET allow_experimental_query_cache = true; -``` - -Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries -of the current session should utilize the query cache. For example, the first execution of query +Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the +current session should utilize the query cache. For example, the first execution of query ```sql SELECT some_expensive_calculation(column_1, column_2) @@ -85,8 +74,35 @@ make the matching more natural, all query-level settings related to the query ca If the query was aborted due to an exception or user cancellation, no entry is written into the query cache. -The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can -be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache). +The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in +records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache). + +It is also possible to limit the cache usage of individual users using [settings profiles](settings/settings-profiles.md) and [settings +constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may +allocate in the query cache and the the maximum number of stored query results. For that, first provide configurations +[query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and +[query_cache_max_entries](settings/settings.md#query-cache-size-max-items) in a user profile in `users.xml`, then make both settings +readonly: + +``` xml + + + + 10000 + + 100 + + + + + + + + + + + +``` To define how long a query must run at least such that its result can be cached, you can use setting [query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query @@ -103,6 +119,20 @@ cached - for that use setting [query_cache_min_query_runs](settings/settings.md# Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl). +Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads +from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries). + +ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#settings-max_block_size) rows. Due to filtering, aggregation, +etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting +[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks +are squashed (if they are tiny) or split (if they are large) into blocks of 'max_block_size' size before insertion into the query result +cache. This reduces performance of writes into the query cache but improves compression rate of cache entries and provides more natural +block granularity when query results are later served from the query cache. + +As a result, the query cache stores for each query multiple (partial) +result blocks. While this behavior is a good default, it can be suppressed using setting +[query_cache_squash_partial_query_results](settings/settings.md#query-cache-squash-partial-query-results). + Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). @@ -110,3 +140,7 @@ Finally, entries in the query cache are not shared between users due to security row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can be marked accessible by other users (i.e. shared) by supplying setting [query_cache_share_between_users](settings/settings.md#query-cache-share-between-users). + +## Related Content + +- Blog: [Introducing the ClickHouse Query Cache](https://clickhouse.com/blog/introduction-to-the-clickhouse-query-cache-and-design) diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index f1f3ca78802..2f0cdec0983 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -12,8 +12,8 @@ The system also has a feature for limiting the complexity of a single query. See In contrast to query complexity restrictions, quotas: -- Place restrictions on a set of queries that can be run over a period of time, instead of limiting a single query. -- Account for resources spent on all remote servers for distributed query processing. +- Place restrictions on a set of queries that can be run over a period of time, instead of limiting a single query. +- Account for resources spent on all remote servers for distributed query processing. Let’s look at the section of the ‘users.xml’ file that defines quotas. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 17d03dfa4ec..7ebf387f2ed 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -2,6 +2,7 @@ slug: /en/operations/server-configuration-parameters/settings sidebar_position: 57 sidebar_label: Server Settings +description: This section contains descriptions of server settings that cannot be changed at the session or query level. --- # Server Settings @@ -24,7 +25,7 @@ Default value: 3600. Data compression settings for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables. -:::warning +:::note Don’t use it if you have just started using ClickHouse. ::: @@ -44,17 +45,17 @@ Configuration template: `` fields: -- `min_part_size` – The minimum size of a data part. -- `min_part_size_ratio` – The ratio of the data part size to the table size. -- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. -- `level` – Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs). +- `min_part_size` – The minimum size of a data part. +- `min_part_size_ratio` – The ratio of the data part size to the table size. +- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. +- `level` – Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs). You can configure multiple `` sections. Actions when conditions are met: -- If a data part matches a condition set, ClickHouse uses the specified compression method. -- If a data part matches multiple condition sets, ClickHouse uses the first matched condition set. +- If a data part matches a condition set, ClickHouse uses the specified compression method. +- If a data part matches multiple condition sets, ClickHouse uses the first matched condition set. If no conditions met for a data part, ClickHouse uses the `lz4` compression. @@ -164,7 +165,7 @@ List of prefixes for [custom settings](../../operations/settings/index.md#custom **See Also** -- [Custom settings](../../operations/settings/index.md#custom_settings) +- [Custom settings](../../operations/settings/index.md#custom_settings) ## core_dump {#server_configuration_parameters-core_dump} @@ -172,7 +173,7 @@ Configures soft limit for core dump file size. Possible values: -- Positive integer. +- Positive integer. Default value: `1073741824` (1 GB). @@ -207,7 +208,7 @@ Default value: `3600` (1 hour). ## database_catalog_unused_dir_rm_timeout_sec {#database_catalog_unused_dir_rm_timeout_sec} Parameter of a task that cleans up garbage from `store/` directory. -If some subdirectory is not used by clickhouse-server and it was previousely "hidden" +If some subdirectory is not used by clickhouse-server and it was previously "hidden" (see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec)) and this directory was not modified for last `database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory. @@ -256,6 +257,7 @@ The path to the table in ZooKeeper. ``` xml /clickhouse/tables/{uuid}/{shard} ``` + ## default_replica_name {#default_replica_name} The replica name in ZooKeeper. @@ -272,10 +274,10 @@ The path to the config file for dictionaries. Path: -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. +- Specify the absolute path or the path relative to the server config file. +- The path can contain wildcards \* and ?. -See also “[Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”. +See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”. **Example** @@ -289,8 +291,8 @@ The path to the config file for executable user defined functions. Path: -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. +- Specify the absolute path or the path relative to the server config file. +- The path can contain wildcards \* and ?. See also “[Executable User Defined Functions](../../sql-reference/functions/index.md#executable-user-defined-functions).”. @@ -333,15 +335,15 @@ Sending data to [Graphite](https://github.com/graphite-project). Settings: -- host – The Graphite server. -- port – The port on the Graphite server. -- interval – The interval for sending, in seconds. -- timeout – The timeout for sending data, in seconds. -- root_path – Prefix for keys. -- metrics – Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. -- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- events_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- asynchronous_metrics – Sending data from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. +- host – The Graphite server. +- port – The port on the Graphite server. +- interval – The interval for sending, in seconds. +- timeout – The timeout for sending data, in seconds. +- root_path – Prefix for keys. +- metrics – Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. +- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- events_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- asynchronous_metrics – Sending data from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. @@ -417,6 +419,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
]]> ``` + ## hsts_max_age {#hsts-max-age} Expired time for HSTS in seconds. The default value is 0 means clickhouse disabled HSTS. If you set a positive number, the HSTS will be enabled and the max-age is the number you set. @@ -513,10 +516,10 @@ These credentials are common for replication via `HTTP` and `HTTPS`. The section contains the following parameters: -- `user` — Username. -- `password` — Password. -- `allow_empty` — If `true`, then other replicas are allowed to connect without authentication even if credentials are set. If `false`, then connections without authentication are refused. Default value: `false`. -- `old` — Contains old `user` and `password` used during credential rotation. Several `old` sections can be specified. +- `user` — Username. +- `password` — Password. +- `allow_empty` — If `true`, then other replicas are allowed to connect without authentication even if credentials are set. If `false`, then connections without authentication are refused. Default value: `false`. +- `old` — Contains old `user` and `password` used during credential rotation. Several `old` sections can be specified. **Credentials Rotation** @@ -581,12 +584,12 @@ Backlog (queue size of pending connections) of the listen socket. Default value: `4096` (as in linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)). Usually this value does not need to be changed, since: -- default value is large enough, -- and for accepting client's connections server has separate thread. +- default value is large enough, +- and for accepting client's connections server has separate thread. So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this counter grows for ClickHouse server it does not mean that this value need to be increased, since: -- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue. -- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected). +- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue. +- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected). Examples: @@ -600,13 +603,13 @@ Logging settings. Keys: -- `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. -- `log` – The log file. Contains all the entries according to `level`. -- `errorlog` – Error log file. -- `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. -- `count` – The number of archived log files that ClickHouse stores. -- `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. -- `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. +- `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. +- `log` – The log file. Contains all the entries according to `level`. +- `errorlog` – Error log file. +- `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. +- `count` – The number of archived log files that ClickHouse stores. +- `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. +- `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. **Example** @@ -646,12 +649,12 @@ Writing to the syslog is also supported. Config example: Keys for syslog: -- use_syslog — Required setting if you want to write to the syslog. -- address — The host\[:port\] of syslogd. If omitted, the local daemon is used. -- hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on). +- use_syslog — Required setting if you want to write to the syslog. +- address — The host\[:port\] of syslogd. If omitted, the local daemon is used. +- hostname — Optional. The name of the host that logs are sent from. +- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on). Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise. -- format – Message format. Possible values: `bsd` and `syslog.` +- format – Message format. Possible values: `bsd` and `syslog.` ## send_crash_reports {#server_configuration_parameters-send_crash_reports} @@ -662,13 +665,13 @@ The server will need access to the public Internet via IPv4 (at the time of writ Keys: -- `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports. -- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. -- `anonymize` - Avoid attaching the server hostname to the crash report. -- `http_proxy` - Configure HTTP proxy for sending crash reports. -- `debug` - Sets the Sentry client into debug mode. -- `tmp_path` - Filesystem path for temporary crash report state. -- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse. +- `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports. +- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. +- `anonymize` - Avoid attaching the server hostname to the crash report. +- `http_proxy` - Configure HTTP proxy for sending crash reports. +- `debug` - Sets the Sentry client into debug mode. +- `tmp_path` - Filesystem path for temporary crash report state. +- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse. **Recommended way to use** @@ -710,8 +713,8 @@ Limits total RAM usage by the ClickHouse server. Possible values: -- Positive integer. -- 0 — Auto. +- Positive integer. +- 0 — Auto. Default value: `0`. @@ -721,8 +724,8 @@ The default `max_server_memory_usage` value is calculated as `memory_amount * ma **See also** -- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) -- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio) +- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) +- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio) ## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio} @@ -730,8 +733,8 @@ Defines the fraction of total physical RAM amount, available to the ClickHouse s Possible values: -- Positive double. -- 0 — The ClickHouse server can use all available RAM. +- Positive double. +- 0 — The ClickHouse server can use all available RAM. Default value: `0.9`. @@ -747,30 +750,30 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa **See Also** -- [max_server_memory_usage](#max_server_memory_usage) +- [max_server_memory_usage](#max_server_memory_usage) ## concurrent_threads_soft_limit_num {#concurrent_threads_soft_limit_num} The maximum number of query processing threads, excluding threads for retrieving data from remote servers, allowed to run all queries. This is not a hard limit. In case if the limit is reached the query will still get at least one thread to run. Query can upscale to desired number of threads during execution if more threads become available. Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. **See Also** -- [Concurrency Control](/docs/en/development/architecture.md#concurrency-control) +- [Concurrency Control](/docs/en/development/architecture.md#concurrency-control) ## concurrent_threads_soft_limit_ratio_to_cores {#concurrent_threads_soft_limit_ratio_to_cores} The maximum number of query processing threads as multiple of number of logical cores. -More details: [concurrent_threads_soft_limit_num](#concurrent-threads-soft-limit-num). +More details: [concurrent_threads_soft_limit_num](#concurrent_threads_soft_limit_num). Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -791,8 +794,8 @@ These settings can be modified at runtime and will take effect immediately. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `100`. @@ -812,8 +815,8 @@ These settings can be modified at runtime and will take effect immediately. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -833,8 +836,8 @@ These settings can be modified at runtime and will take effect immediately. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -850,8 +853,8 @@ The maximum number of simultaneously processed queries related to MergeTree tabl Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -871,8 +874,8 @@ Modifying the setting for one query or user does not affect other queries. Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -884,7 +887,7 @@ Default value: `0`. **See Also** -- [max_concurrent_queries](#max-concurrent-queries) +- [max_concurrent_queries](#max-concurrent-queries) ## max_connections {#max-connections} @@ -914,9 +917,9 @@ We recommend using this option in macOS since the `getrlimit()` function returns Restriction on deleting tables. -If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a DROP query. +If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query. -If you still need to delete the table without restarting the ClickHouse server, create the `/flags/force_drop_table` file and run the DROP query. +This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. @@ -928,13 +931,35 @@ The value 0 means that you can delete all tables without any restrictions. 0 ``` +## max_partition_size_to_drop {#max-partition-size-to-drop} + +Restriction on dropping partitions. + +If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you can’t drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query. + +This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. + +Default value: 50 GB. + +The value 0 means that you can drop partitions without any restrictions. + +:::note +This limitation does not restrict drop table and truncate table, see [max_table_size_to_drop](#max-table-size-to-drop) +::: + +**Example** + +``` xml +0 +``` + ## max_thread_pool_size {#max-thread-pool-size} ClickHouse uses threads from the Global Thread pool to process queries. If there is no idle thread to process a query, then a new thread is created in the pool. `max_thread_pool_size` limits the maximum number of threads in the pool. Possible values: -- Positive integer. +- Positive integer. Default value: `10000`. @@ -950,7 +975,7 @@ If the number of **idle** threads in the Global Thread pool is greater than `max Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -966,7 +991,8 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr Possible values: -- Positive integer. +- Positive integer. +- 0 — No limit. Default value: `10000`. @@ -976,15 +1002,78 @@ Default value: `10000`. 12000 ``` -## background_pool_size {#background_pool_size} +## max_io_thread_pool_size {#max-io-thread-pool-size} -Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. - -Before changing it, please also take a look at related MergeTree settings, such as `number_of_free_entries_in_pool_to_lower_max_size_of_merge` and `number_of_free_entries_in_pool_to_execute_mutation`. +ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. to interact with S3). `max_io_thread_pool_size` limits the maximum number of threads in the pool. Possible values: -- Any positive integer. +- Positive integer. + +Default value: `100`. + +## max_io_thread_pool_free_size {#max-io-thread-pool-free-size} + +If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary. + +Possible values: + +- Positive integer. + +Default value: `0`. + +## io_thread_pool_queue_size {#io-thread-pool-queue-size} + +The maximum number of jobs that can be scheduled on the IO Thread pool. + +Possible values: + +- Positive integer. +- 0 — No limit. + +Default value: `10000`. + +## max_backups_io_thread_pool_size {#max-backups-io-thread-pool-size} + +ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO operations. `max_backups_io_thread_pool_size` limits the maximum number of threads in the pool. + +Possible values: + +- Positive integer. + +Default value: `1000`. + +## max_backups_io_thread_pool_free_size {#max-backups-io-thread-pool-free-size} + +If the number of **idle** threads in the Backups IO Thread pool exceeds `max_backup_io_thread_pool_free_size`, ClickHouse will release resources occupied by idling threads and decrease the pool size. Threads can be created again if necessary. + +Possible values: + +- Positive integer. +- Zero. + +Default value: `0`. + +## backups_io_thread_pool_queue_size {#backups-io-thread-pool-queue-size} + +The maximum number of jobs that can be scheduled on the Backups IO Thread pool. It is recommended to keep this queue unlimited due to the current S3 backup logic. + +Possible values: + +- Positive integer. +- 0 — No limit. + +Default value: `0`. + +## background_pool_size {#background_pool_size} + +Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. + +Before changing it, please also take a look at related MergeTree settings, such as [number_of_free_entries_in_pool_to_lower_max_size_of_merge](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-lower-max-size-of-merge) and [number_of_free_entries_in_pool_to_execute_mutation](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-execute-mutation). + +Possible values: + +- Any positive integer. Default value: 16. @@ -996,13 +1085,13 @@ Default value: 16. ## background_merges_mutations_concurrency_ratio {#background_merges_mutations_concurrency_ratio} -Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example if the ratio equals to 2 and -`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operation could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server. +Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example, if the ratio equals to 2 and +`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operations could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server. The same as for `background_pool_size` setting `background_merges_mutations_concurrency_ratio` could be applied from the `default` profile for backward compatibility. Possible values: -- Any positive integer. +- Any positive integer. Default value: 2. @@ -1012,6 +1101,33 @@ Default value: 2. 3 ``` +## merges_mutations_memory_usage_soft_limit {#merges_mutations_memory_usage_soft_limit} + +Sets the limit on how much RAM is allowed to use for performing merge and mutation operations. +Zero means unlimited. +If ClickHouse reaches this limit, it won't schedule any new background merge or mutation operations but will continue to execute already scheduled tasks. + +Possible values: + +- Any positive integer. + +**Example** + +```xml +0 +``` + +## merges_mutations_memory_usage_to_ram_ratio {#merges_mutations_memory_usage_to_ram_ratio} + +The default `merges_mutations_memory_usage_soft_limit` value is calculated as `memory_amount * merges_mutations_memory_usage_to_ram_ratio`. + +Default value: `0.5`. + +**See also** + +- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) +- [merges_mutations_memory_usage_soft_limit](#merges_mutations_memory_usage_soft_limit) + ## background_merges_mutations_scheduling_policy {#background_merges_mutations_scheduling_policy} Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart. @@ -1019,8 +1135,8 @@ Could be applied from the `default` profile for backward compatibility. Possible values: -- "round_robin" — Every concurrent merge and mutation is executed in round-robin order to ensure starvation-free operation. Smaller merges are completed faster than bigger ones just because they have fewer blocks to merge. -- "shortest_task_first" — Always execute smaller merge or mutation. Merges and mutations are assigned priorities based on their resulting size. Merges with smaller sizes are strictly preferred over bigger ones. This policy ensures the fastest possible merge of small parts but can lead to indefinite starvation of big merges in partitions heavily overloaded by INSERTs. +- "round_robin" — Every concurrent merge and mutation is executed in round-robin order to ensure starvation-free operation. Smaller merges are completed faster than bigger ones just because they have fewer blocks to merge. +- "shortest_task_first" — Always execute smaller merge or mutation. Merges and mutations are assigned priorities based on their resulting size. Merges with smaller sizes are strictly preferred over bigger ones. This policy ensures the fastest possible merge of small parts but can lead to indefinite starvation of big merges in partitions heavily overloaded by INSERTs. Default value: "round_robin". @@ -1036,7 +1152,7 @@ Sets the number of threads performing background moves for tables with MergeTree Possible values: -- Any positive integer. +- Any positive integer. Default value: 8. @@ -1048,11 +1164,11 @@ Default value: 8. ## background_fetches_pool_size {#background_fetches_pool_size} -Sets the number of threads performing background fetches for tables with ReplicatedMergeTree engines. Could be increased at runtime and could be applied at server startup from the `default` profile for backward compatibility. +Sets the number of threads performing background fetches for tables with ReplicatedMergeTree engines. Could be increased at runtime. Possible values: -- Any positive integer. +- Any positive integer. Default value: 8. @@ -1064,11 +1180,11 @@ Default value: 8. ## background_common_pool_size {#background_common_pool_size} -Sets the number of threads performing background non-specialized operations like cleaning the filesystem etc. for tables with MergeTree engines. Could be increased at runtime and could be applied at server startup from the `default` profile for backward compatibility. +Sets the number of threads performing background non-specialized operations like cleaning the filesystem etc. for tables with MergeTree engines. Could be increased at runtime. Possible values: -- Any positive integer. +- Any positive integer. Default value: 8. @@ -1078,6 +1194,25 @@ Default value: 8. 36 ``` +## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} + +Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. + +Possible values: + +- Any positive integer. + +Default value: 16. + +## background_schedule_pool_size {#background_schedule_pool_size} + +Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md/#server-settings-dns-cache-update-period). + +Possible values: + +- Any positive integer. + +Default value: 128. ## merge_tree {#server_configuration_parameters-merge_tree} @@ -1147,26 +1282,26 @@ Support for SSL is provided by the `libpoco` library. The available configuratio Keys for server/client settings: -- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. -- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. -- caConfig (default: none) – The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html). -- verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. -- verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. -- loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). -- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. -- cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. -- sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. -- sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. -- sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) – Time for caching the session on the server. -- extendedVerification (default: false) – If enabled, verify that the certificate CN or SAN matches the peer hostname. -- requireTLSv1 (default: false) – Require a TLSv1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_1 (default: false) – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. -- fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. -- privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . -- disableProtocols (default: "") – Protocols that are not allowed to use. -- preferServerCiphers (default: false) – Preferred server ciphers on the client. +- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. +- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. +- caConfig (default: none) – The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html). +- verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. +- verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. +- loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). +- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. +- cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. +- sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. +- sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. +- sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) – Time for caching the session on the server. +- extendedVerification (default: false) – If enabled, verify that the certificate CN or SAN matches the peer hostname. +- requireTLSv1 (default: false) – Require a TLSv1 connection. Acceptable values: `true`, `false`. +- requireTLSv1_1 (default: false) – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. +- requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. +- fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. +- privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. +- invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . +- disableProtocols (default: "") – Protocols that are not allowed to use. +- preferServerCiphers (default: false) – Preferred server ciphers on the client. **Example of settings:** @@ -1206,12 +1341,14 @@ Queries are logged in the [system.part_log](../../operations/system-tables/part_ Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` - Name of the database. +- `table` - Name of the system table. +- `partition_by` - [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` - Interval for flushing data from the buffer in memory to the table. +- `storage_policy` - Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree#settings) that control the behavior of the MergeTree (optional). **Example** @@ -1238,28 +1375,40 @@ The trailing slash is mandatory. /var/lib/clickhouse/ ``` -## prometheus {#server_configuration_parameters-prometheus} +## Prometheus {#server_configuration_parameters-prometheus} Exposing metrics data for scraping from [Prometheus](https://prometheus.io). Settings: -- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’. -- `port` – Port for `endpoint`. -- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. -- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. +- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’. +- `port` – Port for `endpoint`. +- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. +- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. **Example** ``` xml - + + 0.0.0.0 + 8123 + 9000 + + /metrics - 8001 + 9363 true true true + + +``` + +Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server): +```bash +curl 127.0.0.1:9363/metrics ``` ## query_log {#server_configuration_parameters-query-log} @@ -1270,12 +1419,14 @@ Queries are logged in the [system.query_log](../../operations/system-tables/quer Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` - Name of the database. +- `table` - Name of the system table the queries will be logged in. +- `partition_by` - [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` - Interval for flushing data from the buffer in memory to the table. +- `storage_policy` - Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree#settings) that control the behavior of the MergeTree (optional). If the table does not exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -1296,25 +1447,25 @@ If the table does not exist, ClickHouse will create it. If the structure of the The following settings are available: -- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB). -- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`. -- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB). -- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil). +- `max_size_in_bytes`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB). +- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`. +- `max_entry_size_in_bytes`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB). +- `max_entry_size_in_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil). Changed settings take effect immediately. -:::warning -Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether. +:::note +Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size_in_bytes` or disable the query cache altogether. ::: **Example** ```xml - 1073741824 + 1073741824 1024 - 1048576 - 30000000 + 1048576 + 30000000 ``` @@ -1326,12 +1477,14 @@ Queries are logged in the [system.query_thread_log](../../operations/system-tabl Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` - Name of the database. +- `table` - Name of the system table the queries will be logged in. +- `partition_by` - [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` - Interval for flushing data from the buffer in memory to the table. +- `storage_policy` - Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree#settings) that control the behavior of the MergeTree (optional). If the table does not exist, ClickHouse will create it. If the structure of the query thread log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -1354,12 +1507,14 @@ Queries are logged in the [system.query_views_log](../../operations/system-table Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` - Name of the database. +- `table` - Name of the system table the queries will be logged in. +- `partition_by` - [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` - Interval for flushing data from the buffer in memory to the table. +- `storage_policy` - Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree#settings) that control the behavior of the MergeTree (optional). If the table does not exist, ClickHouse will create it. If the structure of the query views log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -1380,13 +1535,15 @@ Settings for the [text_log](../../operations/system-tables/text_log.md#system_ta Parameters: -- `level` — Maximum Message Level (by default `Trace`) which will be stored in a table. -- `database` — Database name. -- `table` — Table name. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `level` - Maximum Message Level (by default `Trace`) which will be stored in a table. +- `database` - Database name. +- `table` - Table name. +- `partition_by` - [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` - Interval for flushing data from the buffer in memory to the table. +- `storage_policy` - Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree#settings) that control the behavior of the MergeTree (optional). **Example** ```xml @@ -1409,12 +1566,14 @@ Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_ Parameters: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` - Database for storing a table. +- `table` - Table name. +- `partition_by` - [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` - Interval for flushing data from the buffer in memory to the table. +- `storage_policy` - Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree#settings) that control the behavior of the MergeTree (optional). The default server configuration file `config.xml` contains the following settings section: @@ -1471,7 +1630,7 @@ For the value of the `incl` attribute, see the section “[Configuration files]( **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) ## timezone {#server_configuration_parameters-timezone} @@ -1645,11 +1804,11 @@ Default value: `0`. **See also** -- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user) -- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query) -- [tmp_path](#tmp-path) -- [tmp_policy](#tmp-policy) -- [max_server_memory_usage](#max_server_memory_usage) +- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user) +- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query) +- [tmp_path](#tmp-path) +- [tmp_policy](#tmp-policy) +- [max_server_memory_usage](#max_server_memory_usage) ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} @@ -1699,10 +1858,10 @@ The directory with user defined files. Used for SQL user defined functions [SQL Path to the file that contains: -- User configurations. -- Access rights. -- Settings profiles. -- Quota settings. +- User configurations. +- Access rights. +- Settings profiles. +- Quota settings. **Example** @@ -1718,7 +1877,7 @@ ClickHouse uses ZooKeeper for storing metadata of replicas when using replicated This section contains the following parameters: -- `node` — ZooKeeper endpoint. You can set multiple endpoints. +- `node` — ZooKeeper endpoint. You can set multiple endpoints. For example: @@ -1761,9 +1920,9 @@ This section contains the following parameters: **See Also** -- [Replication](../../engines/table-engines/mergetree-family/replication.md) -- [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) -- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper) +- [Replication](../../engines/table-engines/mergetree-family/replication.md) +- [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) +- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper) ## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} @@ -1771,18 +1930,18 @@ Storage method for data part headers in ZooKeeper. This setting only applies to the `MergeTree` family. It can be specified: -- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file. +- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file. ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes. -- For each table. +- For each table. When creating a table, specify the corresponding [engine setting](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The behaviour of an existing table with this setting does not change, even if the global setting changes. **Possible values** -- 0 — Functionality is turned off. -- 1 — Functionality is turned on. +- 0 — Functionality is turned off. +- 1 — Functionality is turned on. If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../../engines/table-engines/mergetree-family/replication.md) tables store the headers of the data parts compactly using a single `znode`. If the table contains many columns, this storage method significantly reduces the volume of the data stored in Zookeeper. @@ -1810,13 +1969,23 @@ The update is performed asynchronously, in a separate system thread. **See also** -- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size) +- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size) ## distributed_ddl {#server-settings-distributed_ddl} Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md) (CREATE, DROP, ALTER, RENAME) on cluster. Works only if [ZooKeeper](#server-settings_zookeeper) is enabled. +The configurable settings within `` include: + +- **path**: the path in Keeper for the `task_queue` for DDL queries +- **profile**: the profile used to execute the DDL queries +- **pool_size**: how many `ON CLUSTER` queries can be run simultaneously +- **max_tasks_in_queue**: the maximum number of tasks that can be in the queue. Default is 1,000 +- **task_max_lifetime**: delete node if its age is greater than this value. Default is `7 * 24 * 60 * 60` (a week in seconds) +- **cleanup_delay_period**: cleaning starts after new node event is received if the last cleaning wasn't made sooner than `cleanup_delay_period` seconds ago. Default is 60 seconds + + **Example** ```xml @@ -1853,14 +2022,14 @@ Default value: `/var/lib/clickhouse/access/`. **See also** -- [Access Control and Account Management](../../operations/access-rights.md#access-control) +- [Access Control and Account Management](../../guides/sre/user-management/index.md#access-control) ## user_directories {#user_directories} Section of the configuration file that contains settings: -- Path to configuration file with predefined users. -- Path to folder where users created by SQL commands are stored. -- ZooKeeper node path where users created by SQL commands are stored and replicated (experimental). +- Path to configuration file with predefined users. +- Path to folder where users created by SQL commands are stored. +- ZooKeeper node path where users created by SQL commands are stored and replicated (experimental). If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used. @@ -1895,8 +2064,8 @@ Users, roles, row policies, quotas, and profiles can be also stored in ZooKeeper You can also define sections `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters: -- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. -- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authentication attempt will fail as if the provided password was incorrect. +- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. +- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authentication attempt will fail as if the provided password was incorrect. **Example** @@ -1916,7 +2085,7 @@ Sets the memory size (in bytes) for a stack trace at every peak allocation step. Possible values: -- Positive integer. +- Positive integer. Default value: `4194304`. @@ -1926,8 +2095,8 @@ Allows to collect random allocations and deallocations and writes them in the [s Possible values: -- Positive integer. -- 0 — Writing of random allocations and deallocations in the `system.trace_log` system table is disabled. +- Positive integer. +- 0 — Writing of random allocations and deallocations in the `system.trace_log` system table is disabled. Default value: `0`. @@ -1937,7 +2106,7 @@ Sets the cache size (in bytes) for mapped files. This setting allows to avoid fr Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -1947,7 +2116,7 @@ Sets the cache size (in bytes) for [compiled expressions](../../operations/cache Possible values: -- Positive integer. +- Positive integer. Default value: `134217728`. @@ -1957,6 +2126,23 @@ Sets the cache size (in elements) for [compiled expressions](../../operations/ca Possible values: -- Positive integer. +- Positive integer. Default value: `10000`. + +## display_secrets_in_show_and_select {#display_secrets_in_show_and_select} + +Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases, +table functions, and dictionaries. + +User wishing to see secrets must also have +[`format_display_secrets_in_show_and_select` format setting](../settings/formats#format_display_secrets_in_show_and_select) +turned on and a +[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#grant-display-secrets) privilege. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index 83ef46053a4..1895a79cd3e 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -40,7 +40,7 @@ If the user tries to violate the constraints an exception is thrown and the sett There are supported few types of constraints: `min`, `max`, `readonly` (with alias `const`) and `changeable_in_readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` or `const` constraint specifies that the user cannot change the corresponding setting at all. The `changeable_in_readonly` constraint type allows user to change the setting within `min`/`max` range even if `readonly` setting is set to 1, otherwise settings are not allow to be changed in `readonly=1` mode. Note that `changeable_in_readonly` is supported only if `settings_constraints_replace_previous` is enabled: ``` xml - true + true ``` diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index fae282c861f..eb1d5db5676 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -22,9 +22,9 @@ The order of priority for defining a setting is: 3. Query settings - - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. + - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. + - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). + - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. View the [Settings](./settings.md) page for a description of the ClickHouse settings. @@ -93,4 +93,4 @@ SELECT getSetting('custom_a'); **See Also** -- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) +- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 5bc174727ad..0b1207ee7b6 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -13,7 +13,7 @@ If the number of broken parts in a single partition exceeds the `max_suspicious_ Possible values: -- Any positive integer. +- Any positive integer. Default value: 100. @@ -54,7 +54,7 @@ If the number of active parts in a single partition exceeds the `parts_to_throw_ Possible values: -- Any positive integer. +- Any positive integer. Default value: 300. @@ -69,7 +69,7 @@ If the number of active parts in a single partition exceeds the `parts_to_delay_ Possible values: -- Any positive integer. +- Any positive integer. Default value: 150. @@ -81,7 +81,7 @@ If the number of inactive parts in a single partition more than the `inactive_pa Possible values: -- Any positive integer. +- Any positive integer. Default value: 0 (unlimited). @@ -91,7 +91,7 @@ If the number of inactive parts in a single partition in the table at least that Possible values: -- Any positive integer. +- Any positive integer. Default value: 0 (unlimited). @@ -101,7 +101,7 @@ The value in seconds, which is used to calculate the `INSERT` delay, if the numb Possible values: -- Any positive integer. +- Any positive integer. Default value: 1. @@ -127,7 +127,7 @@ If the total number of active parts in all partitions of a table exceeds the `ma Possible values: -- Any positive integer. +- Any positive integer. Default value: 100000. @@ -146,8 +146,8 @@ The number of most recently inserted blocks for which ClickHouse Keeper stores h Possible values: -- Any positive integer. -- 0 (disable deduplication) +- Any positive integer. +- 0 (disable deduplication) Default value: 100. @@ -161,8 +161,8 @@ The number of the most recently inserted blocks in the non-replicated [MergeTree Possible values: -- Any positive integer. -- 0 (disable deduplication). +- Any positive integer. +- 0 (disable deduplication). Default value: 0. @@ -174,7 +174,7 @@ The number of seconds after which the hash sums of the inserted blocks are remov Possible values: -- Any positive integer. +- Any positive integer. Default value: 604800 (1 week). @@ -188,8 +188,8 @@ The number of most recently async inserted blocks for which ClickHouse Keeper st Possible values: -- Any positive integer. -- 0 (disable deduplication for async_inserts) +- Any positive integer. +- 0 (disable deduplication for async_inserts) Default value: 10000. @@ -203,7 +203,7 @@ The number of seconds after which the hash sums of the async inserts are removed Possible values: -- Any positive integer. +- Any positive integer. Default value: 604800 (1 week). @@ -229,7 +229,7 @@ The minimum interval (in milliseconds) to update the `use_async_block_ids_cache` Possible values: -- Any positive integer. +- Any positive integer. Default value: 100. @@ -241,7 +241,7 @@ How many records may be in the ClickHouse Keeper log if there is inactive replic Possible values: -- Any positive integer. +- Any positive integer. Default value: 1000 @@ -251,7 +251,7 @@ Keep about this number of last records in ZooKeeper log, even if they are obsole Possible values: -- Any positive integer. +- Any positive integer. Default value: 10 @@ -261,7 +261,7 @@ If the time passed since a replication log (ClickHouse Keeper or ZooKeeper) entr Possible values: -- Any positive integer. +- Any positive integer. Default value: 3600 @@ -271,7 +271,7 @@ If the sum of the size of parts exceeds this threshold and the time since a repl Possible values: -- Any positive integer. +- Any positive integer. Default value: 10,737,418,240 @@ -281,7 +281,7 @@ When this setting has a value greater than zero, only a single replica starts th Possible values: -- Any positive integer. +- Any positive integer. Default value: 0 (seconds) @@ -289,13 +289,13 @@ Default value: 0 (seconds) When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled. -:::warning Zero-copy replication is not ready for production +:::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: Possible values: -- Any positive integer. +- Any positive integer. Default value: 10800 @@ -305,7 +305,7 @@ Recompression works slow in most cases, so we don't start merge with recompressi Possible values: -- Any positive integer. +- Any positive integer. Default value: 7200 @@ -315,7 +315,7 @@ If true, this replica never merges parts and always downloads merged parts from Possible values: -- true, false +- true, false Default value: false @@ -325,7 +325,7 @@ Max broken parts, if more - deny automatic deletion. Possible values: -- Any positive integer. +- Any positive integer. Default value: 100 @@ -336,7 +336,7 @@ Max size of all broken parts, if more - deny automatic deletion. Possible values: -- Any positive integer. +- Any positive integer. Default value: 1,073,741,824 @@ -346,7 +346,7 @@ Do not apply ALTER if number of files for modification(deletion, addition) is gr Possible values: -- Any positive integer. +- Any positive integer. Default value: 75 @@ -356,7 +356,7 @@ Do not apply ALTER, if the number of files for deletion is greater than this set Possible values: -- Any positive integer. +- Any positive integer. Default value: 50 @@ -366,7 +366,7 @@ If the ratio of wrong parts to total number of parts is less than this - allow t Possible values: -- Float, 0.0 - 1.0 +- Float, 0.0 - 1.0 Default value: 0.5 @@ -376,7 +376,7 @@ Limit parallel fetches from endpoint (actually pool size). Possible values: -- Any positive integer. +- Any positive integer. Default value: 15 @@ -386,7 +386,7 @@ HTTP connection timeout for part fetch requests. Inherited from default profile Possible values: -- Any positive integer. +- Any positive integer. Default value: Inherited from default profile `http_connection_timeout` if not set explicitly. @@ -396,7 +396,7 @@ If true, replicated tables replicas on this node will try to acquire leadership. Possible values: -- true, false +- true, false Default value: true @@ -406,7 +406,7 @@ ZooKeeper session expiration check period, in seconds. Possible values: -- Any positive integer. +- Any positive integer. Default value: 60 @@ -416,7 +416,7 @@ Do not remove old local parts when repairing lost replica. Possible values: -- true, false +- true, false Default value: true @@ -426,8 +426,8 @@ HTTP connection timeout (in seconds) for part fetch requests. Inherited from def Possible values: -- Any positive integer. -- 0 - Use value of `http_connection_timeout`. +- Any positive integer. +- 0 - Use value of `http_connection_timeout`. Default value: 0. @@ -437,8 +437,8 @@ HTTP send timeout (in seconds) for part fetch requests. Inherited from default p Possible values: -- Any positive integer. -- 0 - Use value of `http_send_timeout`. +- Any positive integer. +- 0 - Use value of `http_send_timeout`. Default value: 0. @@ -448,8 +448,8 @@ HTTP receive timeout (in seconds) for fetch part requests. Inherited from defaul Possible values: -- Any positive integer. -- 0 - Use value of `http_receive_timeout`. +- Any positive integer. +- 0 - Use value of `http_receive_timeout`. Default value: 0. @@ -463,8 +463,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -482,8 +482,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -497,7 +497,7 @@ The time (in seconds) of storing inactive parts to protect against data loss dur Possible values: -- Any positive integer. +- Any positive integer. Default value: 480. @@ -520,7 +520,7 @@ The maximum total parts size (in bytes) to be merged into one part, if there are Possible values: -- Any positive integer. +- Any positive integer. Default value: 161061273600 (150 GB). @@ -534,7 +534,7 @@ The maximum total part size (in bytes) to be merged into one part, with the mini Possible values: -- Any positive integer. +- Any positive integer. Default value: 1048576 (1 MB) @@ -547,19 +547,45 @@ The number of rows that are read from the merged parts into memory. Possible values: -- Any positive integer. +- Any positive integer. Default value: 8192 Merge reads rows from parts in blocks of `merge_max_block_size` rows, then merges and writes the result into a new part. The read block is placed in RAM, so `merge_max_block_size` affects the size of the RAM required for the merge. Thus, merges can consume a large amount of RAM for tables with very wide rows (if the average row size is 100kb, then when merging 10 parts, (100kb * 10 * 8192) = ~ 8GB of RAM). By decreasing `merge_max_block_size`, you can reduce the amount of RAM required for a merge but slow down a merge. +## number_of_free_entries_in_pool_to_lower_max_size_of_merge {#number-of-free-entries-in-pool-to-lower-max-size-of-merge} + +When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). +This is to allow small merges to process - not filling the pool with long running merges. + +Possible values: + +- Any positive integer. + +Default value: 8 + +## number_of_free_entries_in_pool_to_execute_mutation {#number-of-free-entries-in-pool-to-execute-mutation} + +When there is less than specified number of free entries in pool, do not execute part mutations. +This is to leave free threads for regular merges and avoid "Too many parts". + +Possible values: + +- Any positive integer. + +Default value: 20 + +**Usage** + +The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_pool_size) * [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception. + ## max_part_loading_threads {#max-part-loading-threads} The maximum number of threads that read parts when ClickHouse starts. Possible values: -- Any positive integer. +- Any positive integer. Default value: auto (number of CPU cores). @@ -573,7 +599,7 @@ The setting value specified when the table is created can be overridden via quer Possible values: -- Any positive integer. +- Any positive integer. Default value: -1 (unlimited). @@ -583,7 +609,7 @@ Merge parts if every part in the range is older than the value of `min_age_to_fo Possible values: -- Positive integer. +- Positive integer. Default value: 0 — Disabled. @@ -593,7 +619,7 @@ Whether `min_age_to_force_merge_seconds` should be applied only on the entire pa Possible values: -- true, false +- true, false Default value: false @@ -603,8 +629,8 @@ Enables to allow floating-point number as a partition key. Possible values: -- 0 — Floating-point partition key not allowed. -- 1 — Floating-point partition key allowed. +- 0 — Floating-point partition key not allowed. +- 1 — Floating-point partition key allowed. Default value: `0`. @@ -614,8 +640,8 @@ Enables the check at table creation, that the data type of a column for sampling Possible values: -- true — The check is enabled. -- false — The check is disabled at table creation. +- true — The check is enabled. +- false — The check is disabled at table creation. Default value: `true`. @@ -627,8 +653,8 @@ Sets minimal amount of bytes to enable balancing when distributing new big parts Possible values: -- Positive integer. -- 0 — Balancing is disabled. +- Positive integer. +- 0 — Balancing is disabled. Default value: `0`. @@ -644,8 +670,8 @@ The setting is applicable to `MergeTree` tables with enabled [data replication]( Possible values: -- 0 — Parts are removed. -- 1 — Parts are detached. +- 0 — Parts are removed. +- 1 — Parts are detached. Default value: `0`. @@ -655,7 +681,7 @@ Sets the interval in seconds for ClickHouse to execute the cleanup of old tempor Possible values: -- Any positive integer. +- Any positive integer. Default value: `60` seconds. @@ -665,7 +691,7 @@ Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, Possible values: -- Any positive integer. +- Any positive integer. Default value: `1` second. @@ -675,8 +701,8 @@ Max number of concurrently executed queries related to the MergeTree table. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0` (no limit). @@ -692,8 +718,8 @@ The minimal number of marks read by the query for applying the [max_concurrent_q Possible values: -- Positive integer. -- 0 — Disabled (`max_concurrent_queries` limit applied to no queries). +- Positive integer. +- 0 — Disabled (`max_concurrent_queries` limit applied to no queries). Default value: `0` (limit never applied). @@ -805,3 +831,13 @@ You can see which parts of `s` were stored using the sparse serialization: │ s │ Sparse │ └────────┴────────────────────┘ ``` + +## clean_deleted_rows + +Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour. + +Possible values: + +- `Always` or `Never`. + +Default value: `Never` \ No newline at end of file diff --git a/docs/en/operations/settings/permissions-for-queries.md b/docs/en/operations/settings/permissions-for-queries.md index c565de9b21a..9e9c564d426 100644 --- a/docs/en/operations/settings/permissions-for-queries.md +++ b/docs/en/operations/settings/permissions-for-queries.md @@ -21,8 +21,8 @@ Restricts permissions for read data, write data, and change settings queries. When set to 1, allows: -- All types of read queries (like SELECT and equivalent queries). -- Queries that modify only session context (like USE). +- All types of read queries (like SELECT and equivalent queries). +- Queries that modify only session context (like USE). When set to 2, allows the above plus: - SET and CREATE TEMPORARY TABLE @@ -33,9 +33,9 @@ When set to 2, allows the above plus: Possible values: -- 0 — Read, Write, and Change settings queries are allowed. -- 1 — Only Read data queries are allowed. -- 2 — Read data and Change settings queries are allowed. +- 0 — Read, Write, and Change settings queries are allowed. +- 1 — Only Read data queries are allowed. +- 2 — Read data and Change settings queries are allowed. Default value: 0 @@ -54,8 +54,8 @@ Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) q Possible values: -- 0 — DDL queries are not allowed. -- 1 — DDL queries are allowed. +- 0 — DDL queries are not allowed. +- 1 — DDL queries are allowed. Default value: 1 diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 7a6b2340d29..163ed5d5826 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -26,7 +26,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation ( The maximum amount of RAM to use for running a query on a single server. -In the default configuration file, the maximum is 10 GB. +The default setting is unlimited (set to `0`). The setting does not consider the volume of available memory or the total volume of memory on the machine. The restriction applies to a single query within a single server. @@ -101,8 +101,8 @@ Enables or disables execution of `GROUP BY` clauses in external memory. See [GRO Possible values: -- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation. -- 0 — `GROUP BY` in external memory disabled. +- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation. +- 0 — `GROUP BY` in external memory disabled. Default value: 0. @@ -257,8 +257,8 @@ ClickHouse can proceed with different actions when the limit is reached. Use the Possible values: -- Positive integer. -- 0 — Unlimited number of rows. +- Positive integer. +- 0 — Unlimited number of rows. Default value: 0. @@ -274,8 +274,8 @@ ClickHouse can proceed with different actions when the limit is reached. Use [jo Possible values: -- Positive integer. -- 0 — Memory control is disabled. +- Positive integer. +- 0 — Memory control is disabled. Default value: 0. @@ -283,27 +283,27 @@ Default value: 0. Defines what action ClickHouse performs when any of the following join limits is reached: -- [max_bytes_in_join](#settings-max_bytes_in_join) -- [max_rows_in_join](#settings-max_rows_in_join) +- [max_bytes_in_join](#settings-max_bytes_in_join) +- [max_rows_in_join](#settings-max_rows_in_join) Possible values: -- `THROW` — ClickHouse throws an exception and breaks operation. -- `BREAK` — ClickHouse breaks operation and does not throw an exception. +- `THROW` — ClickHouse throws an exception and breaks operation. +- `BREAK` — ClickHouse breaks operation and does not throw an exception. Default value: `THROW`. **See Also** -- [JOIN clause](../../sql-reference/statements/select/join.md#select-join) -- [Join table engine](../../engines/table-engines/special/join.md) +- [JOIN clause](../../sql-reference/statements/select/join.md#select-join) +- [Join table engine](../../engines/table-engines/special/join.md) ## max_partitions_per_insert_block {#max-partitions-per-insert-block} Limits the maximum number of partitions in a single inserted block. -- Positive integer. -- 0 — Unlimited number of partitions. +- Positive integer. +- 0 — Unlimited number of partitions. Default value: 100. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 3580d83f704..3b87b829c92 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -7,6 +7,23 @@ toc_max_heading_level: 2 # Format settings {#format-settings} +## format_display_secrets_in_show_and_select {#format_display_secrets_in_show_and_select} + +Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases, +table functions, and dictionaries. + +User wishing to see secrets must also have +[`display_secrets_in_show_and_select` server setting](../server-configuration-parameters/settings#display_secrets_in_show_and_select) +turned on and a +[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#grant-display-secrets) privilege. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + ## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} Enables or disables skipping insertion of extra data. @@ -24,8 +41,8 @@ Supported formats: Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -52,8 +69,8 @@ Supported formats: Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -72,8 +89,8 @@ Supported formats: Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -87,8 +104,8 @@ When this option is enabled, extended table metadata are sent from server to cli Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -103,8 +120,8 @@ For complex default expressions `input_format_defaults_for_omitted_fields` must Possible values: -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — `NULL` fields are initialized with default column values. +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — `NULL` fields are initialized with default column values. Default value: `1`. @@ -142,7 +159,7 @@ y Nullable(String) z IPv4 ``` -:::warning +:::note If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored. ::: @@ -179,11 +196,11 @@ The setting does not apply to [date and time functions](../../sql-reference/func Possible values: -- `'best_effort'` — Enables extended parsing. +- `'best_effort'` — Enables extended parsing. ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. -- `'basic'` — Use basic parser. +- `'basic'` — Use basic parser. ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. @@ -191,8 +208,8 @@ Default value: `'basic'`. See also: -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) ## date_time_output_format {#date_time_output_format} @@ -200,15 +217,15 @@ Allows choosing different output formats of the text representation of date and Possible values: -- `simple` - Simple output format. +- `simple` - Simple output format. ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. -- `iso` - ISO output format. +- `iso` - ISO output format. ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). -- `unix_timestamp` - Unix timestamp output format. +- `unix_timestamp` - Unix timestamp output format. ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. @@ -216,8 +233,8 @@ Default value: `simple`. See also: -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) ## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} @@ -298,8 +315,8 @@ By default, when inserting data into a `Distributed` table with more than one sh Possible values: -- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. -- 1 — Insertion is done randomly among all available shards when no distributed key is given. +- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. +- 1 — Insertion is done randomly among all available shards when no distributed key is given. Default value: `0`. @@ -311,18 +328,18 @@ Enables or disables the insertion of JSON data with nested objects. Supported formats: -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. See also: -- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. +- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. ## input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers} @@ -373,8 +390,8 @@ Such integers are enclosed in quotes by default. This behavior is compatible wit Possible values: -- 0 — Integers are output without quotes. -- 1 — Integers are enclosed in quotes. +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. Default value: 1. @@ -390,8 +407,8 @@ Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/format Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -526,8 +543,8 @@ Enables the ability to output all rows as a JSON array in the [JSONEachRow](../. Possible values: -- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. -- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. +- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. +- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. Default value: `0`. @@ -594,8 +611,8 @@ When enabled, always treat enum values as enum ids for TSV input format. It's re Possible values: -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. Default value: 0. @@ -743,8 +760,8 @@ When enabled, always treat enum values as enum ids for CSV input format. It's re Possible values: -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. Default value: 0. @@ -873,11 +890,11 @@ Enables or disables the full SQL parser if the fast stream parser can’t parse Possible values: -- 0 — Disabled. +- 0 — Disabled. In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. -- 1 — Enabled. +- 1 — Enabled. In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. @@ -923,8 +940,8 @@ Enables or disables template deduction for SQL expressions in [Values](../../int Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -934,9 +951,9 @@ For the following query: INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... ``` -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). -- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). +- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. ### input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals} @@ -950,11 +967,11 @@ This setting is used only when `input_format_values_deduce_templates_of_expressi Possible values: -- 0 — Disabled. +- 0 — Disabled. In this case, ClickHouse may use a more general type for some literals (e.g., `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. -- 1 — Enabled. +- 1 — Enabled. In this case, ClickHouse checks the actual type of literal and uses an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. @@ -964,12 +981,12 @@ Default value: 1. ### input_format_arrow_import_nested {#input_format_arrow_import_nested} -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. Possible values: -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. @@ -997,8 +1014,8 @@ Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardina Possible values: -- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. -- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. +- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. +- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. Default value: `0`. @@ -1014,16 +1031,22 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column Enabled by default. +### output_format_arrow_compression_method {#output_format_arrow_compression_method} + +Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) + +Default value: `none`. + ## ORC format settings {#orc-format-settings} ### input_format_orc_import_nested {#input_format_orc_import_nested} -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. Possible values: -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. @@ -1057,16 +1080,22 @@ Use ORC String type instead of Binary for String columns. Disabled by default. +### output_format_orc_compression_method {#output_format_orc_compression_method} + +Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed) + +Default value: `none`. + ## Parquet format settings {#parquet-format-settings} ### input_format_parquet_import_nested {#input_format_parquet_import_nested} -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. Possible values: -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. @@ -1112,6 +1141,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`, Default value: `2.latest`. +### output_format_parquet_compression_method {#output_format_parquet_compression_method} + +Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed) + +Default value: `lz4`. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} @@ -1174,8 +1209,8 @@ Enables using fields that are not specified in [Avro](../../interfaces/formats.m Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -1193,9 +1228,9 @@ Type: string Possible values: -- `null` — No compression -- `deflate` — Compress with Deflate (zlib) -- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) +- `null` — No compression +- `deflate` — Compress with Deflate (zlib) +- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) Default value: `snappy` (if available) or `deflate`. @@ -1242,8 +1277,8 @@ Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pre Possible values: -- Positive integer. -- 0 — The value is cut completely. +- Positive integer. +- 0 — The value is cut completely. Default value: `10000` symbols. @@ -1318,8 +1353,8 @@ Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) Possible values: -- 0 — Output without row numbers. -- 1 — Output with row numbers. +- 0 — Output without row numbers. +- 1 — Output with row numbers. Default value: `0`. @@ -1363,12 +1398,12 @@ Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/# Possible values: -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). Default value: `'Escaped'`. @@ -1416,12 +1451,12 @@ Field escaping rule. Possible values: -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). Default value: `Raw`. @@ -1439,9 +1474,9 @@ Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfac Possible values: -- `'by_values'` — Values in enums should be the same, names can be different. -- `'by_names'` — Names in enums should be the same, values can be different. -- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. +- `'by_values'` — Values in enums should be the same, names can be different. +- `'by_names'` — Names in enums should be the same, values can be different. +- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. Default value: `'by_values'`. @@ -1474,7 +1509,7 @@ Default value: `65505`. The name of table that will be used in the output INSERT statement. -Default value: `'table''`. +Default value: `table`. ### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} @@ -1514,4 +1549,12 @@ Disabled by default. The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. -Default value: `1GiB` +Default value: `1GiB`. + +## Native format settings {#native-format-settings} + +### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion} + +Allow types conversion in Native input format between columns from input data and requested columns. + +Enabled by default. diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 4527152583f..2f39a75453c 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -9,7 +9,7 @@ sidebar_label: Settings Profiles A settings profile is a collection of settings grouped under the same name. :::note -ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it. +ClickHouse also supports [SQL-driven workflow](../../guides/sre/user-management/index.md#access-control) for managing settings profiles. We recommend using it. ::: The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access. diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index b55d64fc4f7..1f41eafd02e 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -9,7 +9,7 @@ sidebar_label: User Settings The `users` section of the `user.xml` configuration file contains user settings. :::note -ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it. +ClickHouse also supports [SQL-driven workflow](../../guides/sre/user-management/index.md#access-control) for managing users. We recommend using it. ::: Structure of the `users` section: @@ -38,6 +38,10 @@ Structure of the `users` section: + + + GRANT SELECT ON system.* + @@ -47,13 +51,13 @@ Structure of the `users` section: Password can be specified in plaintext or in SHA256 (hex format). -- To assign a password in plaintext (**not recommended**), place it in a `password` element. +- To assign a password in plaintext (**not recommended**), place it in a `password` element. For example, `qwerty`. The password can be left blank.
-- To assign a password using its SHA256 hash, place it in a `password_sha256_hex` element. +- To assign a password using its SHA256 hash, place it in a `password_sha256_hex` element. For example, `65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5`. @@ -65,7 +69,7 @@ Password can be specified in plaintext or in SHA256 (hex format). -- For compatibility with MySQL clients, password can be specified in double SHA1 hash. Place it in `password_double_sha1_hex` element. +- For compatibility with MySQL clients, password can be specified in double SHA1 hash. Place it in `password_double_sha1_hex` element. For example, `08b4a0f1de6ad37da17359e592c8d74788a83eb0`. @@ -77,32 +81,54 @@ Password can be specified in plaintext or in SHA256 (hex format). ### access_management {#access_management-user-setting} -This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user. +This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. +### grants {#grants-user-setting} + +This setting allows to grant any rights to selected user. +Each element of the list should be `GRANT` query without any grantees specified. + +Example: + +```xml + + + GRANT SHOW ON *.* + GRANT CREATE ON *.* WITH GRANT OPTION + GRANT SELECT ON system.* + + +``` + +This setting can't be specified at the same time with +`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets` +and `allow_databases` settings. + + ### user_name/networks {#user-namenetworks} List of networks from which the user can connect to the ClickHouse server. Each element of the list can have one of the following forms: -- `` — IP address or network mask. +- `` — IP address or network mask. Examples: `213.180.204.3`, `10.0.0.1/8`, `10.0.0.1/255.255.255.0`, `2a02:6b8::3`, `2a02:6b8::3/64`, `2a02:6b8::3/ffff:ffff:ffff:ffff::`. -- `` — Hostname. +- `` — Hostname. Example: `example01.host.ru`. To check access, a DNS query is performed, and all returned IP addresses are compared to the peer address. -- `` — Regular expression for hostnames. +- `` — Regular expression for hostnames. Example, `^example\d\d-\d\d-\d\.host\.ru$` @@ -118,7 +144,7 @@ To open access for user from any network, specify: ::/0 ``` -:::warning +:::note It’s insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet. ::: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f960d2df98e..4f3b4e43358 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -40,6 +40,39 @@ SETTINGS additional_table_filters = (('table_1', 'x != 2')) └───┴──────┘ ``` +## additional_result_filter + +An additional filter expression to apply to the result of `SELECT` query. +This setting is not applied to any subquery. + +Default value: `''`. + +**Example** + +``` sql +insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +``` +```response +┌─x─┬─y────┐ +│ 1 │ a │ +│ 2 │ bb │ +│ 3 │ ccc │ +│ 4 │ dddd │ +└───┴──────┘ +``` +```sql +SELECT * +FROM table_1 +SETTINGS additional_result_filter = 'x != 2' +``` +```response +┌─x─┬─y────┐ +│ 1 │ a │ +│ 3 │ ccc │ +│ 4 │ dddd │ +└───┴──────┘ +``` + ## allow_nondeterministic_mutations {#allow_nondeterministic_mutations} User-level setting that allows mutations on replicated tables to make use of non-deterministic functions such as `dictGet`. @@ -71,17 +104,17 @@ ClickHouse applies this setting when the query contains the product of distribut Restrictions: -- Only applied for IN and JOIN subqueries. -- Only if the FROM section uses a distributed table containing more than one shard. -- If the subquery concerns a distributed table containing more than one shard. -- Not used for a table-valued [remote](../../sql-reference/table-functions/remote.md) function. +- Only applied for IN and JOIN subqueries. +- Only if the FROM section uses a distributed table containing more than one shard. +- If the subquery concerns a distributed table containing more than one shard. +- Not used for a table-valued [remote](../../sql-reference/table-functions/remote.md) function. Possible values: -- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” exception). -- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN`/`JOIN.` -- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.` -- `allow` — Allows the use of these types of subqueries. +- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” exception). +- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN`/`JOIN.` +- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.` +- `allow` — Allows the use of these types of subqueries. ## prefer_global_in_and_join {#prefer-global-in-and-join} @@ -89,8 +122,8 @@ Enables the replacement of `IN`/`JOIN` operators with `GLOBAL IN`/`GLOBAL JOIN`. Possible values: -- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`. -- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`. +- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`. +- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`. Default value: `0`. @@ -104,7 +137,7 @@ Another use case of `prefer_global_in_and_join` is accessing tables created by **See also:** -- [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN` +- [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN` ## enable_optimize_predicate_expression {#enable-optimize-predicate-expression} @@ -114,8 +147,8 @@ Predicate pushdown may significantly reduce network traffic for distributed quer Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -162,8 +195,8 @@ Use data skipping indexes during query execution. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -277,14 +310,14 @@ Sets the safety threshold for data volume generated by function [range](../../sq Possible values: -- Positive integer. +- Positive integer. Default value: `500,000,000`. **See Also** -- [max_block_size](#setting-max_block_size) -- [min_insert_block_size_rows](#min-insert-block-size-rows) +- [max_block_size](#setting-max_block_size) +- [min_insert_block_size_rows](#min-insert-block-size-rows) ## enable_http_compression {#settings-enable_http_compression} @@ -294,8 +327,8 @@ For more information, read the [HTTP interface description](../../interfaces/htt Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -315,8 +348,8 @@ For more information, read the [HTTP interface description](../../interfaces/htt Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -326,7 +359,7 @@ Sets the maximum URI length of an HTTP request. Possible values: -- Positive integer. +- Positive integer. Default value: 1048576. @@ -336,7 +369,7 @@ Sets the maximum number of addresses generated from patterns for the [remote](.. Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -346,7 +379,7 @@ Sets the maximum number of addresses generated from patterns for external storag Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -358,8 +391,8 @@ For more information, read the [HTTP interface description](../../interfaces/htt Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -369,8 +402,8 @@ Limits the maximum number of HTTP GET redirect hops for [URL](../../engines/tabl Possible values: -- Any positive integer number of hops. -- 0 — No hops allowed. +- Any positive integer number of hops. +- 0 — No hops allowed. Default value: 0. @@ -383,8 +416,8 @@ This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements Possible values: -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — Default column value is inserted instead of `NULL`. +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — Default column value is inserted instead of `NULL`. Default value: `1`. @@ -394,10 +427,10 @@ Sets default strictness for [JOIN clauses](../../sql-reference/statements/select Possible values: -- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL. -- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same. -- `ASOF` — For joining sequences with an uncertain match. -- `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. +- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL. +- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same. +- `ASOF` — For joining sequences with an uncertain match. +- `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. Default value: `ALL`. @@ -419,6 +452,8 @@ Possible values: The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`. + - hash [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. @@ -460,22 +495,22 @@ Possible values: Changes the behaviour of join operations with `ANY` strictness. -:::warning +:::note This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables. ::: Possible values: -- 0 — If the right table has more than one matching row, only the first one found is joined. -- 1 — If the right table has more than one matching row, only the last one found is joined. +- 0 — If the right table has more than one matching row, only the first one found is joined. +- 1 — If the right table has more than one matching row, only the last one found is joined. Default value: 0. See also: -- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join) -- [Join table engine](../../engines/table-engines/special/join.md) -- [join_default_strictness](#settings-join_default_strictness) +- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join) +- [Join table engine](../../engines/table-engines/special/join.md) +- [join_default_strictness](#settings-join_default_strictness) ## join_use_nulls {#join_use_nulls} @@ -483,8 +518,8 @@ Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour Possible values: -- 0 — The empty cells are filled with the default value of the corresponding field type. -- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md). +- 0 — The empty cells are filled with the default value of the corresponding field type. +- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md). Default value: 0. @@ -496,14 +531,14 @@ Columns for these keys are filled with either default value or `NULL` in corresp Possible values: -- 0 — The default value for the aggregation key type is used to produce missing values. -- 1 — ClickHouse executes `GROUP BY` the same way as the SQL standard says. The types of aggregation keys are converted to [Nullable](/docs/en/sql-reference/data-types/nullable.md/#data_type-nullable). Columns for corresponding aggregation keys are filled with [NULL](/docs/en/sql-reference/syntax.md) for rows that didn't use it. +- 0 — The default value for the aggregation key type is used to produce missing values. +- 1 — ClickHouse executes `GROUP BY` the same way as the SQL standard says. The types of aggregation keys are converted to [Nullable](/docs/en/sql-reference/data-types/nullable.md/#data_type-nullable). Columns for corresponding aggregation keys are filled with [NULL](/docs/en/sql-reference/syntax.md) for rows that didn't use it. Default value: 0. See also: -- [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) +- [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) ## partial_merge_join_optimizations {#partial_merge_join_optimizations} @@ -513,8 +548,8 @@ By default, this setting enables improvements that could lead to wrong results. Possible values: -- 0 — Optimizations disabled. -- 1 — Optimizations enabled. +- 0 — Optimizations disabled. +- 1 — Optimizations enabled. Default value: 1. @@ -530,7 +565,7 @@ ClickHouse server: Possible values: -- Any positive integer. Recommended range of values: \[1000, 100000\]. +- Any positive integer. Recommended range of values: \[1000, 100000\]. Default value: 65536. @@ -542,7 +577,7 @@ The bigger the value of the setting, the more RAM is used and the less disk I/O Possible values: -- Any positive integer, starting from 2. +- Any positive integer, starting from 2. Default value: 64. @@ -550,30 +585,41 @@ Default value: 64. Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations. -:::warning +:::note Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour. ::: When the legacy behaviour is enabled: -- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. -- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. +- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. When the legacy behaviour is disabled: -- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. -- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables. +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. +- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables. Possible values: -- 0 — Legacy behaviour is disabled. -- 1 — Legacy behaviour is enabled. +- 0 — Legacy behaviour is disabled. +- 1 — Legacy behaviour is enabled. Default value: 0. See also: -- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) +- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) + +## max_rows_in_set_to_optimize_join + +Maximal size of the set to filter joined tables by each other's row sets before joining. + +Possible values: + +- 0 — Disable. +- Any positive integer. + +Default value: 100000. ## temporary_files_codec {#temporary_files_codec} @@ -581,8 +627,8 @@ Sets compression codec for temporary files used in sorting and joining operation Possible values: -- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied. -- NONE — No compression is applied. +- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied. +- NONE — No compression is applied. Default value: LZ4. @@ -606,7 +652,7 @@ If the number of rows to be read from a file of a [MergeTree](../../engines/tabl Possible values: -- Positive integer. +- Positive integer. Default value: `163840`. @@ -616,7 +662,7 @@ The minimum number of lines to read from one file before the [MergeTree](../../e Possible values: -- Positive integer. +- Positive integer. Default value: `163840`. @@ -626,7 +672,7 @@ If the number of bytes to read from one file of a [MergeTree](../../engines/tabl Possible value: -- Positive integer. +- Positive integer. Default value: `251658240`. @@ -636,7 +682,7 @@ The minimum number of bytes to read from one file before [MergeTree](../../engin Possible values: -- Positive integer. +- Positive integer. Default value: `251658240`. @@ -646,7 +692,7 @@ If the distance between two data blocks to be read in one file is less than `mer Possible values: -- Any positive integer. +- Any positive integer. Default value: 0. @@ -656,7 +702,7 @@ If the distance between two data blocks to be read in one file is less than `mer Possible values: -- Any positive integer. +- Any positive integer. Default value: 0. @@ -666,7 +712,7 @@ When searching for data, ClickHouse checks the data marks in the index file. If Possible values: -- Any positive even integer. +- Any positive even integer. Default value: 8. @@ -678,7 +724,7 @@ The cache of uncompressed blocks stores data extracted for queries. ClickHouse u Possible values: -- Any positive integer. +- Any positive integer. Default value: 128 ✕ 8192. @@ -690,7 +736,7 @@ The cache of uncompressed blocks stores data extracted for queries. ClickHouse u Possible values: -- Any positive integer. +- Any positive integer. Default value: 2013265920. @@ -702,8 +748,8 @@ ClickHouse uses this setting when reading data from tables. If the total storage Possible values: -- 0 — Direct I/O is disabled. -- Positive integer. +- 0 — Direct I/O is disabled. +- Positive integer. Default value: 0. @@ -713,14 +759,14 @@ Sets the method of data compression that is used for communication between serve Possible values: -- `LZ4` — sets LZ4 compression method. -- `ZSTD` — sets ZSTD compression method. +- `LZ4` — sets LZ4 compression method. +- `ZSTD` — sets ZSTD compression method. Default value: `LZ4`. **See Also** -- [network_zstd_compression_level](#network_zstd_compression_level) +- [network_zstd_compression_level](#network_zstd_compression_level) ## network_zstd_compression_level {#network_zstd_compression_level} @@ -728,7 +774,7 @@ Adjusts the level of ZSTD compression. Used only when [network_compression_metho Possible values: -- Positive integer from 1 to 15. +- Positive integer from 1 to 15. Default value: `1`. @@ -756,8 +802,8 @@ Only the queries with the following type will get to the log: - `QUERY_FINISH` - `EXCEPTION_WHILE_PROCESSING` -- Type: milliseconds -- Default value: 0 (any query) +- Type: milliseconds +- Default value: 0 (any query) ## log_queries_min_type {#settings-log-queries-min-type} @@ -785,8 +831,8 @@ Query threads log into the [system.query_thread_log](../../operations/system-tab Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `1`. @@ -814,8 +860,8 @@ Allows to log formatted queries to the [system.query_log](../../operations/syste Possible values: -- 0 — Formatted queries are not logged in the system table. -- 1 — Formatted queries are logged in the system table. +- 0 — Formatted queries are not logged in the system table. +- 1 — Formatted queries are logged in the system table. Default value: `0`. @@ -827,7 +873,7 @@ It can be used to improve the readability of server logs. Additionally, it helps Possible values: -- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception. +- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception. Default value: empty string. @@ -857,8 +903,8 @@ Write time that processor spent during execution/waiting for data to `system.pro See also: -- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log) -- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) +- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md) +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) ## max_insert_block_size {#settings-max_insert_block_size} @@ -878,8 +924,8 @@ Sets the minimum number of rows in the block that can be inserted into a table b Possible values: -- Positive integer. -- 0 — Squashing disabled. +- Positive integer. +- 0 — Squashing disabled. Default value: 1048576. @@ -889,8 +935,8 @@ Sets the minimum number of bytes in the block which can be inserted into a table Possible values: -- Positive integer. -- 0 — Squashing disabled. +- Positive integer. +- 0 — Squashing disabled. Default value: 268435456. @@ -902,8 +948,8 @@ Sets the time in seconds. If a replica's lag is greater than or equal to the set Possible values: -- Positive integer. -- 0 — Replica lags are not checked. +- Positive integer. +- 0 — Replica lags are not checked. To prevent the use of any replica with a non-zero lag, set this parameter to 1. @@ -930,8 +976,8 @@ The maximum number of threads to execute the `INSERT SELECT` query. Possible values: -- 0 (or 1) — `INSERT SELECT` no parallel execution. -- Positive integer. Bigger than 1. +- 0 (or 1) — `INSERT SELECT` no parallel execution. +- Positive integer. Bigger than 1. Default value: 0. @@ -942,7 +988,7 @@ Higher values will lead to higher memory usage. The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. -:::warning +:::note This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. ::: @@ -960,16 +1006,16 @@ We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed. -:::warning +:::note This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. ::: ## max_query_size {#settings-max_query_size} -The maximum part of a query that can be taken to RAM for parsing with the SQL parser. -The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction. +The maximum number of bytes of a query string parsed by the SQL parser. +Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction. -Default value: 256 KiB. +Default value: 262144 (= 256 KiB). ## max_parser_depth {#max_parser_depth} @@ -977,8 +1023,8 @@ Limits maximum recursion depth in the recursive descent parser. Allows controlli Possible values: -- Positive integer. -- 0 — Recursion depth is unlimited. +- Positive integer. +- 0 — Recursion depth is unlimited. Default value: 1000. @@ -988,6 +1034,16 @@ The interval in microseconds for checking whether request execution has been can Default value: 100,000 (checks for cancelling and sends the progress ten times per second). +## idle_connection_timeout {#idle_connection_timeout} + +Timeout to close idle TCP connections after specified number of seconds. + +Possible values: + +- Positive integer (0 - close immediately, after 0 seconds). + +Default value: 3600. + ## connect_timeout, receive_timeout, send_timeout {#connect-timeout-receive-timeout-send-timeout} Timeouts in seconds on the socket used for communicating with the client. @@ -1028,8 +1084,8 @@ If the value is exceeded, the server throws an exception. Possible values: -- Positive integer. -- 0 — Unlimited depth. +- Positive integer. +- 0 — Unlimited depth. Default value: `5`. @@ -1041,8 +1097,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -1062,8 +1118,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -1080,7 +1136,13 @@ Could be used for throttling speed when replicating the data to add or replace n The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the ‘shard’ and ‘replica’ sections are used in the cluster definition. If unsuccessful, several attempts are made to connect to various replicas. -Default value: 50. +Default value: 1000. + +## connect_timeout_with_failover_secure_ms + +Connection timeout for selecting first healthy replica (for secure connections) + +Default value: 1000. ## connection_pool_max_wait_ms {#connection-pool-max-wait-ms} @@ -1125,6 +1187,36 @@ Disable limit on kafka_num_consumers that depends on the number of available CPU Default value: false. +## postgresql_connection_pool_size {#postgresql-connection-pool-size} + +Connection pool size for PostgreSQL table engine and database engine. + +Default value: 16 + +## postgresql_connection_pool_size {#postgresql-connection-pool-size} + +Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. + +Default value: 5000 + +## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection} + +Close connection before returning connection to the pool. + +Default value: true. + +## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size} + +Connection pool size for each connection settings string in ODBC bridge. + +Default value: 16 + +## odbc_bridge_use_connection_pooling {#odbc-bridge-use-connection-pooling} + +Use connection pooling in ODBC bridge. If set to false, a new connection is created every time. + +Default value: true + ## use_uncompressed_cache {#setting-use_uncompressed_cache} Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). @@ -1174,15 +1266,15 @@ Specifies the algorithm of replicas selection that is used for distributed query ClickHouse supports the following algorithms of choosing replicas: -- [Random](#load_balancing-random) (by default) -- [Nearest hostname](#load_balancing-nearest_hostname) -- [In order](#load_balancing-in_order) -- [First or random](#load_balancing-first_or_random) -- [Round robin](#load_balancing-round_robin) +- [Random](#load_balancing-random) (by default) +- [Nearest hostname](#load_balancing-nearest_hostname) +- [In order](#load_balancing-in_order) +- [First or random](#load_balancing-first_or_random) +- [Round robin](#load_balancing-round_robin) See also: -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) ### Random (by Default) {#load_balancing-random} @@ -1242,13 +1334,15 @@ Enables/disables preferable using the localhost replica when processing distribu Possible values: -- 1 — ClickHouse always sends a query to the localhost replica if it exists. -- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting. +- 1 — ClickHouse always sends a query to the localhost replica if it exists. +- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting. Default value: 1. -:::warning -Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas). +:::note +Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key). +If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas. +If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects. ::: ## totals_mode {#totals-mode} @@ -1267,22 +1361,59 @@ The maximum number of replicas for each shard when executing a query. Possible values: -- Positive integer. +- Positive integer. Default value: `1`. **Additional Info** -This setting is useful for replicated tables with a sampling key. A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases: +This options will produce different results depending on the settings used. + +:::note +This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details. +::: + +### Parallel processing using `SAMPLE` key + +A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases: - The position of the sampling key in the partitioning key does not allow efficient range scans. - Adding a sampling key to the table makes filtering by other columns less efficient. - The sampling key is an expression that is expensive to calculate. - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency. -:::warning -This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details. -::: +### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) + +This setting is useful for any replicated table. + +## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key} + +An arbitrary integer expression that can be used to split work between replicas for a specific table. +The value can be any integer expression. +A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) +and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type). + +Simple expressions using primary keys are preferred. + +If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards. +Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard. + +## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type} + +How to use `parallel_replicas_custom_key` expression for splitting work between replicas. + +Possible values: + +- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`. +- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values. + +Default value: `default`. + +## allow_experimental_parallel_reading_from_replicas + +If true, ClickHouse will send a SELECT query to all replicas of a table (up to `max_parallel_replicas`) . It will work for any kind of MergeTree table. + +Default value: `false`. ## compile_expressions {#compile-expressions} @@ -1307,14 +1438,14 @@ Enables or disables JIT-compilation of aggregate functions to native code. Enabl Possible values: -- 0 — Aggregation is done without JIT compilation. -- 1 — Aggregation is done using JIT compilation. +- 0 — Aggregation is done without JIT compilation. +- 1 — Aggregation is done using JIT compilation. Default value: `1`. **See Also** -- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) +- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) ## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression} @@ -1322,8 +1453,8 @@ The minimum number of identical aggregate expressions to start JIT-compilation. Possible values: -- Positive integer. -- 0 — Identical aggregate expressions are always JIT-compiled. +- Positive integer. +- 0 — Identical aggregate expressions are always JIT-compiled. Default value: `3`. @@ -1334,8 +1465,8 @@ and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in mor Possible values: -- 0 - Yes -- 1 - No +- 0 - Disabled +- 1 - Enabled Default value: `0`. @@ -1392,6 +1523,28 @@ Possible values: Default value: `0` +## query_cache_compress_entries {#query-cache-compress-entries} + +Compress entries in the [query cache](../query-cache.md). Lessens the memory consumption of the query cache at the cost of slower inserts into / reads from it. + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `1` + +## query_cache_squash_partial_results {#query-cache-squash-partial-results} + +Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query_cache_compress_entries)). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `1` + ## query_cache_ttl {#query-cache-ttl} After this time in seconds entries in the [query cache](../query-cache.md) become stale. @@ -1414,13 +1567,33 @@ Possible values: Default value: `0`. +## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes} + +The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited. + +Possible values: + +- Positive integer >= 0. + +Default value: 0 (no restriction). + +## query_cache_max_entries {#query-cache-max-entries} + +The maximum number of query results the current user may store in the [query cache](../query-cache.md). 0 means unlimited. + +Possible values: + +- Positive integer >= 0. + +Default value: 0 (no restriction). + ## insert_quorum {#settings-insert_quorum} Enables the quorum writes. -- If `insert_quorum < 2`, the quorum writes are disabled. -- If `insert_quorum >= 2`, the quorum writes are enabled. -- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number. +- If `insert_quorum < 2`, the quorum writes are disabled. +- If `insert_quorum >= 2`, the quorum writes are enabled. +- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number. Default value: 0 - disabled. @@ -1432,14 +1605,14 @@ When `insert_quorum_parallel` is disabled, all replicas in the quorum are consis ClickHouse generates an exception: -- If the number of available replicas at the time of the query is less than the `insert_quorum`. -- When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed. +- If the number of available replicas at the time of the query is less than the `insert_quorum`. +- When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed. See also: -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [select_sequential_consistency](#settings-select_sequential_consistency) ## insert_quorum_timeout {#settings-insert_quorum_timeout} @@ -1449,9 +1622,9 @@ Default value: 600 000 milliseconds (ten minutes). See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [select_sequential_consistency](#settings-select_sequential_consistency) ## insert_quorum_parallel {#settings-insert_quorum_parallel} @@ -1459,16 +1632,16 @@ Enables or disables parallelism for quorum `INSERT` queries. If enabled, additio Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [select_sequential_consistency](#settings-select_sequential_consistency) ## select_sequential_consistency {#settings-select_sequential_consistency} @@ -1476,8 +1649,8 @@ Enables or disables sequential consistency for `SELECT` queries. Requires `inser Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -1489,9 +1662,9 @@ When `insert_quorum_parallel` is enabled (the default), then `select_sequential_ See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) ## insert_deduplicate {#settings-insert-deduplicate} @@ -1499,8 +1672,8 @@ Enables or disables block deduplication of `INSERT` (for Replicated\* tables). Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -1509,20 +1682,21 @@ For the replicated tables by default the only 100 of the most recent blocks for For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window). ## Asynchronous Insert settings + ### async_insert {#async-insert} -Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. +Enables or disables asynchronous inserts. Note that deduplication is disabled by default, see [async_insert_deduplicate](#async-insert-deduplicate). If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. -The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. +The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. Also the buffer will be flushed to disk if at least [async_insert_max_query_number](#async-insert-max-query-number) async insert queries per block were received. This last setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled. If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted. Possible values: -- 0 — Insertions are made synchronously, one after another. -- 1 — Multiple asynchronous insertions enabled. +- 0 — Insertions are made synchronously, one after another. +- 1 — Multiple asynchronous insertions enabled. Default value: `0`. @@ -1532,8 +1706,8 @@ The maximum number of threads for background data parsing and insertion. Possible values: -- Positive integer. -- 0 — Asynchronous insertions are disabled. +- Positive integer. +- 0 — Asynchronous insertions are disabled. Default value: `16`. @@ -1543,8 +1717,8 @@ Enables or disables waiting for processing of asynchronous insertion. If enabled Possible values: -- 0 — Server returns `OK` even if the data is not yet inserted. -- 1 — Server returns `OK` only after the data is inserted. +- 0 — Server returns `OK` even if the data is not yet inserted. +- 1 — Server returns `OK` only after the data is inserted. Default value: `1`. @@ -1554,8 +1728,8 @@ The timeout in seconds for waiting for processing of asynchronous insertion. Possible values: -- Positive integer. -- 0 — Disabled. +- Positive integer. +- 0 — Disabled. Default value: [lock_acquire_timeout](#lock_acquire_timeout). @@ -1565,19 +1739,19 @@ The maximum size of the unparsed data in bytes collected per query before being Possible values: -- Positive integer. -- 0 — Asynchronous insertions are disabled. +- Positive integer. +- 0 — Asynchronous insertions are disabled. Default value: `100000`. ### async_insert_max_query_number {#async-insert-max-query-number} -The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#settings-async-insert-deduplicate) is enabled. +The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled. Possible values: -- Positive integer. -- 0 — Asynchronous insertions are disabled. +- Positive integer. +- 0 — Asynchronous insertions are disabled. Default value: `450`. @@ -1587,8 +1761,8 @@ The maximum timeout in milliseconds since the first `INSERT` query before insert Possible values: -- Positive integer. -- 0 — Timeout disabled. +- Positive integer. +- 0 — Timeout disabled. Default value: `200`. @@ -1598,22 +1772,23 @@ The maximum timeout in milliseconds since the last `INSERT` query before dumping Possible values: -- Positive integer. -- 0 — Timeout disabled. +- Positive integer. +- 0 — Timeout disabled. Default value: `0`. -### async_insert_deduplicate {#settings-async-insert-deduplicate} + +### async_insert_deduplicate {#async-insert-deduplicate} Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tables). Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. -By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_isnert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)). +By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)). For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)). We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication. This function does not work for non-replicated tables. @@ -1644,7 +1819,7 @@ user can avoid the same inserted data being deduplicated. Possible values: -- Any string +- Any string Default value: empty string (disabled) @@ -1687,8 +1862,8 @@ The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeep Possible values: -- Positive integer. -- 0 — Retries are disabled +- Positive integer. +- 0 — Retries are disabled Default value: 0 @@ -1708,8 +1883,8 @@ Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT Possible values: -- Positive integer. -- 0 — No timeout +- Positive integer. +- 0 — No timeout Default value: 100 @@ -1719,8 +1894,8 @@ Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT Possible values: -- Positive integer. -- 0 — Maximum timeout is not limited +- Positive integer. +- 0 — Maximum timeout is not limited Default value: 10000 @@ -1730,8 +1905,8 @@ Limits the data volume (in bytes) that is received or transmitted over the netwo Possible values: -- Positive integer. -- 0 — Data volume control is disabled. +- Positive integer. +- 0 — Data volume control is disabled. Default value: 0. @@ -1741,8 +1916,8 @@ Limits the speed of the data exchange over the network in bytes per second. This Possible values: -- Positive integer. -- 0 — Bandwidth control is disabled. +- Positive integer. +- 0 — Bandwidth control is disabled. Default value: 0. @@ -1752,8 +1927,8 @@ Limits the speed of the data exchange over the network in bytes per second. This Possible values: -- Positive integer. -- 0 — Control of the data speed is disabled. +- Positive integer. +- 0 — Control of the data speed is disabled. Default value: 0. @@ -1763,8 +1938,8 @@ Limits the speed that data is exchanged at over the network in bytes per second. Possible values: -- Positive integer. -- 0 — Control of the data speed is disabled. +- Positive integer. +- 0 — Control of the data speed is disabled. Default value: 0. @@ -1774,11 +1949,11 @@ Specifies which of the `uniq*` functions should be used to perform the [COUNT(DI Possible values: -- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md/#agg_function-uniq) -- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md/#agg_function-uniqcombined) -- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md/#agg_function-uniqcombined64) -- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md/#agg_function-uniqhll12) -- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact) +- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md/#agg_function-uniq) +- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md/#agg_function-uniqcombined) +- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md/#agg_function-uniqcombined64) +- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md/#agg_function-uniqhll12) +- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact) Default value: `uniqExact`. @@ -1788,25 +1963,25 @@ Enables or disables silently skipping of unavailable shards. Shard is considered unavailable if all its replicas are unavailable. A replica is unavailable in the following cases: -- ClickHouse can’t connect to replica for any reason. +- ClickHouse can’t connect to replica for any reason. When connecting to a replica, ClickHouse performs several attempts. If all these attempts fail, the replica is considered unavailable. -- Replica can’t be resolved through DNS. +- Replica can’t be resolved through DNS. If replica’s hostname can’t be resolved through DNS, it can indicate the following situations: - - Replica’s host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error. + - Replica’s host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error. - - Configuration error. ClickHouse configuration file contains a wrong hostname. + - Configuration error. ClickHouse configuration file contains a wrong hostname. Possible values: -- 1 — skipping enabled. +- 1 — skipping enabled. If a shard is unavailable, ClickHouse returns a result based on partial data and does not report node availability issues. -- 0 — skipping disabled. +- 0 — skipping disabled. If a shard is unavailable, ClickHouse throws an exception. @@ -1818,9 +1993,9 @@ Do not merge aggregation states from different servers for distributed query pro Possible values: -- `0` — Disabled (final query processing is done on the initiator node). -- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards. -- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). +- `0` — Disabled (final query processing is done on the initiator node). +- `1` - Do not merge aggregation states from different servers for distributed query processing (query completely processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards. +- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completely on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). Default value: `0` @@ -1858,28 +2033,28 @@ FORMAT PrettyCompactMonoBlock Enables or disables [LIMIT](#limit) applying on each shard separatelly. This will allow to avoid: -- Sending extra rows over network; -- Processing rows behind the limit on the initiator. +- Sending extra rows over network; +- Processing rows behind the limit on the initiator. Starting from 21.9 version you cannot get inaccurate results anymore, since `distributed_push_down_limit` changes query execution only if at least one of the conditions met: -- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0. -- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`. -- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and: - - [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled. - - [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled. +- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0. +- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`. +- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and: + - [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled. + - [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `1`. See also: -- [distributed_group_by_no_merge](#distributed-group-by-no-merge) -- [optimize_skip_unused_shards](#optimize-skip-unused-shards) -- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) +- [distributed_group_by_no_merge](#distributed-group-by-no-merge) +- [optimize_skip_unused_shards](#optimize-skip-unused-shards) +- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) ## optimize_skip_unused_shards_limit {#optimize-skip-unused-shards-limit} @@ -1895,8 +2070,8 @@ Enables or disables skipping of unused shards for [SELECT](../../sql-reference/s Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0 @@ -1906,8 +2081,8 @@ Rewrite IN in query for remote shards to exclude values that does not belong to Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1 (since it requires `optimize_skip_unused_shards` anyway, which `0` by default) @@ -1917,8 +2092,8 @@ Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats w Possible values: -- 0 — Disallowed. -- 1 — Allowed. +- 0 — Disallowed. +- 1 — Allowed. Default value: 0 @@ -1928,9 +2103,9 @@ Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence st Possible values: -- 0 — Disabled, `optimize_skip_unused_shards` works always. -- 1 — Enables `optimize_skip_unused_shards` only for the first level. -- 2 — Enables `optimize_skip_unused_shards` up to the second level. +- 0 — Disabled, `optimize_skip_unused_shards` works always. +- 1 — Enables `optimize_skip_unused_shards` only for the first level. +- 2 — Enables `optimize_skip_unused_shards` up to the second level. Default value: 0 @@ -1940,9 +2115,9 @@ Enables or disables query execution if [optimize_skip_unused_shards](#optimize-s Possible values: -- 0 — Disabled. ClickHouse does not throw an exception. -- 1 — Enabled. Query execution is disabled only if the table has a sharding key. -- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table. +- 0 — Disabled. ClickHouse does not throw an exception. +- 1 — Enabled. Query execution is disabled only if the table has a sharding key. +- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table. Default value: 0 @@ -1952,9 +2127,9 @@ Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shard Possible values: -- 0 - Disabled, `force_optimize_skip_unused_shards` works always. -- 1 — Enables `force_optimize_skip_unused_shards` only for the first level. -- 2 — Enables `force_optimize_skip_unused_shards` up to the second level. +- 0 - Disabled, `force_optimize_skip_unused_shards` works always. +- 1 — Enables `force_optimize_skip_unused_shards` only for the first level. +- 2 — Enables `force_optimize_skip_unused_shards` up to the second level. Default value: 0 @@ -1979,16 +2154,16 @@ The following types of queries are not supported (support for some of them may b Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0 See also: -- [distributed_group_by_no_merge](#distributed-group-by-no-merge) -- [distributed_push_down_limit](#distributed-push-down-limit) -- [optimize_skip_unused_shards](#optimize-skip-unused-shards) +- [distributed_group_by_no_merge](#distributed-group-by-no-merge) +- [distributed_push_down_limit](#distributed-push-down-limit) +- [optimize_skip_unused_shards](#optimize-skip-unused-shards) :::note Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key). @@ -2002,8 +2177,8 @@ By default, `OPTIMIZE` returns successfully even if it didn’t do anything. Thi Possible values: -- 1 — Throwing an exception is enabled. -- 0 — Throwing an exception is disabled. +- 1 — Throwing an exception is enabled. +- 0 — Throwing an exception is disabled. Default value: 0. @@ -2017,8 +2192,8 @@ By default, `OPTIMIZE TABLE ... FINAL` query rewrites the one part even if there Possible values: -- 1 - Enable optimization. -- 0 - Disable optimization. +- 1 - Enable optimization. +- 0 - Disable optimization. Default value: 0. @@ -2028,19 +2203,19 @@ Enables or disables optimization by transforming some functions to reading subco These functions can be transformed: -- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. -- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. +- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. +- [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. +- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. +- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. Possible values: -- 0 — Optimization disabled. -- 1 — Optimization enabled. +- 0 — Optimization disabled. +- 1 — Optimization enabled. Default value: `0`. @@ -2057,49 +2232,49 @@ Default value: `1`. See also: -- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) +- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} -- Type: seconds -- Default value: 60 seconds +- Type: seconds +- Default value: 60 seconds Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after the last error. See also: -- [load_balancing](#load_balancing-round_robin) -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [load_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) +- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) ## distributed_replica_error_cap {#settings-distributed_replica_error_cap} -- Type: unsigned int -- Default value: 1000 +- Type: unsigned int +- Default value: 1000 The error count of each replica is capped at this value, preventing a single replica from accumulating too many errors. See also: -- [load_balancing](#load_balancing-round_robin) -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [load_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) +- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) ## distributed_replica_max_ignored_errors {#settings-distributed_replica_max_ignored_errors} -- Type: unsigned int -- Default value: 0 +- Type: unsigned int +- Default value: 0 The number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm). See also: -- [load_balancing](#load_balancing-round_robin) -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) +- [load_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) +- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) ## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms} @@ -2107,7 +2282,7 @@ Base interval for the [Distributed](../../engines/table-engines/special/distribu Possible values: -- A positive integer number of milliseconds. +- A positive integer number of milliseconds. Default value: 100 milliseconds. @@ -2117,7 +2292,7 @@ Maximum interval for the [Distributed](../../engines/table-engines/special/distr Possible values: -- A positive integer number of milliseconds. +- A positive integer number of milliseconds. Default value: 30000 milliseconds (30 seconds). @@ -2129,8 +2304,8 @@ When batch sending is enabled, the [Distributed](../../engines/table-engines/spe Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: 0. @@ -2144,8 +2319,8 @@ So installing this setting to `1` will disable batching for such batches (i.e. t Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: 0. @@ -2153,7 +2328,7 @@ Default value: 0. This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine). ::: -:::warning +:::note You should not rely on automatic batch splitting, since this may hurt performance. ::: @@ -2161,13 +2336,13 @@ You should not rely on automatic batch splitting, since this may hurt performanc Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. -:::warning +:::note To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments do not allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start. ::: Possible values: -- You can set values in the range `[-20, 19]`. +- You can set values in the range `[-20, 19]`. Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long-running non-interactive queries because it allows them to quickly give up resources in favour of short interactive queries when they arrive. @@ -2179,14 +2354,14 @@ Sets the period for a real clock timer of the [query profiler](../../operations/ Possible values: -- Positive integer number, in nanoseconds. +- Positive integer number, in nanoseconds. Recommended values: - 10000000 (100 times a second) nanoseconds and less for single queries. - 1000000000 (once a second) for cluster-wide profiling. -- 0 for turning off the timer. +- 0 for turning off the timer. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -2194,7 +2369,7 @@ Default value: 1000000000 nanoseconds (once a second). See also: -- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) ## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns} @@ -2202,14 +2377,14 @@ Sets the period for a CPU clock timer of the [query profiler](../../operations/o Possible values: -- A positive integer number of nanoseconds. +- A positive integer number of nanoseconds. Recommended values: - 10000000 (100 times a second) nanoseconds and more for single queries. - 1000000000 (once a second) for cluster-wide profiling. -- 0 for turning off the timer. +- 0 for turning off the timer. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -2217,7 +2392,7 @@ Default value: 1000000000 nanoseconds. See also: -- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) ## memory_profiler_step {#memory_profiler_step} @@ -2225,9 +2400,9 @@ Sets the step of memory profiler. Whenever query memory usage becomes larger tha Possible values: -- A positive integer number of bytes. +- A positive integer number of bytes. -- 0 for turning off the memory profiler. +- 0 for turning off the memory profiler. Default value: 4,194,304 bytes (4 MiB). @@ -2237,9 +2412,9 @@ Sets the probability of collecting stacktraces at random allocations and dealloc Possible values: -- A positive floating-point number in the range [0..1]. +- A positive floating-point number in the range [0..1]. -- 0.0 for turning off the memory sampling. +- 0.0 for turning off the memory sampling. Default value: 0.0. @@ -2249,8 +2424,8 @@ Enables or disables collecting stacktraces on each update of profile events alon Possible values: -- 1 — Tracing of profile events enabled. -- 0 — Tracing of profile events disabled. +- 1 — Tracing of profile events enabled. +- 0 — Tracing of profile events disabled. Default value: 0. @@ -2260,15 +2435,15 @@ Enables or disables [introspections functions](../../sql-reference/functions/int Possible values: -- 1 — Introspection functions enabled. -- 0 — Introspection functions disabled. +- 1 — Introspection functions enabled. +- 0 — Introspection functions disabled. Default value: 0. **See Also** -- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md) -- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md) +- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) ## input_format_parallel_parsing {#input-format-parallel-parsing} @@ -2276,8 +2451,8 @@ Enables or disables order-preserving parallel parsing of data formats. Supported Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: `1`. @@ -2287,15 +2462,15 @@ Enables or disables parallel formatting of data formats. Supported only for [TSV Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: `1`. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} -- Type: unsigned int -- Default value: 1 MiB +- Type: unsigned int +- Default value: 1 MiB The minimum chunk size in bytes, which each thread will parse in parallel. @@ -2305,7 +2480,7 @@ Sleep time for merge selecting when no part is selected. A lower setting trigger Possible values: -- Any positive integer. +- Any positive integer. Default value: `5000`. @@ -2317,9 +2492,9 @@ If we execute `INSERT INTO distributed_table_a SELECT ... FROM distributed_table Possible values: -- 0 — Disabled. -- 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine. -- 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine. +- 0 — Disabled. +- 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine. +- 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine. Default value: 0. @@ -2331,15 +2506,15 @@ By default, when inserting data into a `Distributed` table, the ClickHouse serve Possible values: -- 0 — Data is inserted in asynchronous mode. -- 1 — Data is inserted in synchronous mode. +- 0 — Data is inserted in asynchronous mode. +- 1 — Data is inserted in synchronous mode. Default value: `0`. **See Also** -- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed) -- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed) +- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed) +- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed) ## insert_shard_id {#insert_shard_id} @@ -2355,8 +2530,8 @@ SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster'; Possible values: -- 0 — Disabled. -- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table. +- 0 — Disabled. +- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table. Default value: `0`. @@ -2394,8 +2569,8 @@ Uses compact format for storing blocks for async (`insert_distributed_sync`) INS Possible values: -- 0 — Uses `user[:password]@host:port#default_database` directory format. -- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format. +- 0 — Uses `user[:password]@host:port#default_database` directory format. +- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format. Default value: `1`. @@ -2406,43 +2581,19 @@ Default value: `1`. ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} -Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. - -Possible values: - -- Any positive integer. - -Default value: 16. +That setting was moved to the [server configuration parameters](../../operations/server-configuration-parameters/settings.md/#background_buffer_flush_schedule_pool_size). ## background_move_pool_size {#background_move_pool_size} -Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. - -Possible values: - -- Any positive integer. - -Default value: 8. +That setting was moved to the [server configuration parameters](../../operations/server-configuration-parameters/settings.md/#background_move_pool_size). ## background_schedule_pool_size {#background_schedule_pool_size} -Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md/#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and can’t be changed in a user session. - -Possible values: - -- Any positive integer. - -Default value: 128. +That setting was moved to the [server configuration parameters](../../operations/server-configuration-parameters/settings.md/#background_schedule_pool_size). ## background_fetches_pool_size {#background_fetches_pool_size} -Sets the number of threads performing background fetches for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. For production usage with frequent small insertions or slow ZooKeeper cluster it is recommended to use default value. - -Possible values: - -- Any positive integer. - -Default value: 8. +That setting was moved to the [server configuration parameters](../../operations/server-configuration-parameters/settings.md/#background_fetches_pool_size). ## always_fetch_merged_part {#always_fetch_merged_part} @@ -2452,39 +2603,22 @@ When merging is prohibited, the replica never merges parts and always downloads Possible values: -- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. -- 1 — `Replicated*MergeTree`-engine tables do not merge data parts at the replica. The tables download merged data parts from other replicas. +- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. +- 1 — `Replicated*MergeTree`-engine tables do not merge data parts at the replica. The tables download merged data parts from other replicas. Default value: 0. **See Also** -- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) +- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) ## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size} -Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at the ClickHouse server start and can’t be changed in a user session. - -Possible values: - -- Any positive integer. - -Default value: 16. +That setting was moved to the [server configuration parameters](../../operations/server-configuration-parameters/settings.md/#background_distributed_schedule_pool_size). ## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} -Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and can’t be changed in a user session. - -Possible values: - -- Any positive integer. - -Default value: 16. - -**See Also** - -- [Kafka](../../engines/table-engines/integrations/kafka.md/#kafka) engine. -- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md/#rabbitmq-engine) engine. +That setting was moved to the [server configuration parameters](../../operations/server-configuration-parameters/settings.md/#background_message_broker_schedule_pool_size). ## validate_polygons {#validate_polygons} @@ -2505,8 +2639,8 @@ By default, `NULL` values can’t be compared because `NULL` means undefined val Possible values: -- 0 — Comparison of `NULL` values in `IN` operator returns `false`. -- 1 — Comparison of `NULL` values in `IN` operator returns `true`. +- 0 — Comparison of `NULL` values in `IN` operator returns `false`. +- 1 — Comparison of `NULL` values in `IN` operator returns `true`. Default value: 0. @@ -2553,7 +2687,7 @@ Result: **See Also** -- [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing) +- [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing) ## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size} @@ -2561,7 +2695,7 @@ Sets a maximum size in rows of a shared global dictionary for the [LowCardinalit Possible values: -- Any positive integer. +- Any positive integer. Default value: 8192. @@ -2573,8 +2707,8 @@ By default, the ClickHouse server monitors the size of dictionaries and if a dic Possible values: -- 1 — Creating several dictionaries for the data part is prohibited. -- 0 — Creating several dictionaries for the data part is not prohibited. +- 1 — Creating several dictionaries for the data part is prohibited. +- 0 — Creating several dictionaries for the data part is not prohibited. Default value: 0. @@ -2588,8 +2722,8 @@ This setting is required mainly for third-party clients which do not support `Lo Possible values: -- 1 — Usage of `LowCardinality` is not restricted. -- 0 — Usage of `LowCardinality` is restricted. +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. Default value: 1. @@ -2599,16 +2733,16 @@ Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcar For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result: -- Disk space usage can rise. -- RAM consumption can be higher, depending on a dictionary size. -- Some functions can work slower due to extra coding/encoding operations. +- Disk space usage can rise. +- RAM consumption can be higher, depending on a dictionary size. +- Some functions can work slower due to extra coding/encoding operations. Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above. Possible values: -- 1 — Usage of `LowCardinality` is not restricted. -- 0 — Usage of `LowCardinality` is restricted. +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. Default value: 0. @@ -2618,14 +2752,14 @@ Sets the minimum number of rows in the block which can be inserted into a table Possible values: -- Any positive integer. -- 0 — Squashing disabled. +- Any positive integer. +- 0 — Squashing disabled. Default value: 1048576. **See Also** -- [min_insert_block_size_rows](#min-insert-block-size-rows) +- [min_insert_block_size_rows](#min-insert-block-size-rows) ## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views} @@ -2633,14 +2767,14 @@ Sets the minimum number of bytes in the block which can be inserted into a table Possible values: -- Any positive integer. -- 0 — Squashing disabled. +- Any positive integer. +- 0 — Squashing disabled. Default value: 268435456. **See also** -- [min_insert_block_size_bytes](#min-insert-block-size-bytes) +- [min_insert_block_size_bytes](#min-insert-block-size-bytes) ## optimize_read_in_order {#optimize_read_in_order} @@ -2648,14 +2782,14 @@ Enables [ORDER BY](../../sql-reference/statements/select/order-by.md/#optimize_r Possible values: -- 0 — `ORDER BY` optimization is disabled. -- 1 — `ORDER BY` optimization is enabled. +- 0 — `ORDER BY` optimization is disabled. +- 1 — `ORDER BY` optimization is enabled. Default value: `1`. **See Also** -- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) +- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) ## optimize_aggregation_in_order {#optimize_aggregation_in_order} @@ -2663,14 +2797,14 @@ Enables [GROUP BY](../../sql-reference/statements/select/group-by.md) optimizati Possible values: -- 0 — `GROUP BY` optimization is disabled. -- 1 — `GROUP BY` optimization is enabled. +- 0 — `GROUP BY` optimization is disabled. +- 1 — `GROUP BY` optimization is enabled. Default value: `0`. **See Also** -- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order) +- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order) ## mutations_sync {#mutations_sync} @@ -2678,16 +2812,16 @@ Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql Possible values: -- 0 - Mutations execute asynchronously. -- 1 - The query waits for all mutations to complete on the current server. -- 2 - The query waits for all mutations to complete on all replicas (if they exist). +- 0 - Mutations execute asynchronously. +- 1 - The query waits for all mutations to complete on the current server. +- 2 - The query waits for all mutations to complete on all replicas (if they exist). Default value: `0`. **See Also** -- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) -- [Mutations](../../sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [Mutations](../../sql-reference/statements/alter/index.md#mutations) ## ttl_only_drop_parts {#ttl_only_drop_parts} @@ -2701,15 +2835,15 @@ Dropping whole parts instead of partial cleaning TTL-d rows allows having shorte Possible values: -- 0 — The complete dropping of data parts is disabled. -- 1 — The complete dropping of data parts is enabled. +- 0 — The complete dropping of data parts is disabled. +- 1 — The complete dropping of data parts is enabled. Default value: `0`. **See Also** -- [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses) (`merge_with_ttl_timeout` setting) -- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-table-ttl) +- [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses) (`merge_with_ttl_timeout` setting) +- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-table-ttl) ## lock_acquire_timeout {#lock_acquire_timeout} @@ -2719,21 +2853,21 @@ Locking timeout is used to protect from deadlocks while executing read/write ope Possible values: -- Positive integer (in seconds). -- 0 — No locking timeout. +- Positive integer (in seconds). +- 0 — No locking timeout. Default value: `120` seconds. ## cast_keep_nullable {#cast_keep_nullable} -Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) operations. +Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md/#castx-t) operations. When the setting is enabled and the argument of `CAST` function is `Nullable`, the result is also transformed to `Nullable` type. When the setting is disabled, the result always has the destination type exactly. Possible values: -- 0 — The `CAST` result has exactly the destination type specified. -- 1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`. +- 0 — The `CAST` result has exactly the destination type specified. +- 1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`. Default value: `0`. @@ -2771,7 +2905,7 @@ Result: **See Also** -- [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function +- [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function ## system_events_show_zero_values {#system_events_show_zero_values} @@ -2781,8 +2915,8 @@ Some monitoring systems require passing all the metrics values to them for each Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `0`. @@ -2825,11 +2959,11 @@ Possible values: Default value: `0`. -:::warning +:::note Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care. ::: -:::warning +:::note Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash. ::: @@ -2840,8 +2974,8 @@ It is implemented via query rewrite (similar to [count_distinct_implementation]( Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -2872,9 +3006,9 @@ Sets a mode for combining `SELECT` query results. The setting is only used when Possible values: -- `'DISTINCT'` — ClickHouse outputs rows as a result of combining queries removing duplicate rows. -- `'ALL'` — ClickHouse outputs all rows as a result of combining queries including duplicate rows. -- `''` — ClickHouse generates an exception when used with `UNION`. +- `'DISTINCT'` — ClickHouse outputs rows as a result of combining queries removing duplicate rows. +- `'ALL'` — ClickHouse outputs all rows as a result of combining queries including duplicate rows. +- `''` — ClickHouse generates an exception when used with `UNION`. Default value: `''`. @@ -2951,8 +3085,8 @@ Enables special logic to perform merges on replicas. Possible values: -- Positive integer (in seconds). -- 0 — Special merges logic is not used. Merges happen in the usual way on all the replicas. +- Positive integer (in seconds). +- 0 — Special merges logic is not used. Merges happen in the usual way on all the replicas. Default value: `0`. @@ -2966,12 +3100,12 @@ It can be useful when merges are CPU bounded not IO bounded (performing heavy da ## max_final_threads {#max-final-threads} -Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier. +Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier. Possible values: -- Positive integer. -- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread. +- Positive integer. +- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread. Default value: `16`. @@ -2981,9 +3115,9 @@ Sets the probability that the ClickHouse can start a trace for executed queries Possible values: -- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). -- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. -- 1 — The trace for all executed queries is enabled. +- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- 1 — The trace for all executed queries is enabled. Default value: `0`. @@ -2993,8 +3127,8 @@ Enables or disables data transformation before the insertion, as if merge was do Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -3061,9 +3195,9 @@ Possible values: Default value: `0`. -## s3_truncate_on_insert +## s3_truncate_on_insert -Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists. +Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists. Possible values: - 0 — `INSERT` query appends new data to the end of the file. @@ -3071,9 +3205,9 @@ Possible values: Default value: `0`. -## hdfs_truncate_on_insert +## hdfs_truncate_on_insert -Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. +Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. Possible values: - 0 — `INSERT` query appends new data to the end of the file. @@ -3081,11 +3215,11 @@ Possible values: Default value: `0`. -## engine_file_allow_create_multiple_files +## engine_file_allow_create_multiple_files Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern: -`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc. +`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc. Possible values: - 0 — `INSERT` query appends new data to the end of the file. @@ -3093,11 +3227,11 @@ Possible values: Default value: `0`. -## s3_create_new_file_on_insert +## s3_create_new_file_on_insert Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern: -initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. +initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. Possible values: - 0 — `INSERT` query appends new data to the end of the file. @@ -3109,7 +3243,7 @@ Default value: `0`. Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern: -initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. +initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. Possible values: - 0 — `INSERT` query appends new data to the end of the file. @@ -3117,25 +3251,14 @@ Possible values: Default value: `0`. -## allow_experimental_geo_types {#allow-experimental-geo-types} - -Allows working with experimental [geo data types](../../sql-reference/data-types/geo.md). - -Possible values: - -- 0 — Working with geo data types is disabled. -- 1 — Working with geo data types is enabled. - -Default value: `0`. - ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. Possible values: -- 0 — Queries will be executed with delay. -- 1 — Queries will be executed without delay. +- 0 — Queries will be executed with delay. +- 1 — Queries will be executed without delay. Default value: `0`. @@ -3145,8 +3268,8 @@ Sets the `SHOW TABLE` query display. Possible values: -- 0 — The query will be displayed without table UUID. -- 1 — The query will be displayed with table UUID. +- 0 — The query will be displayed without table UUID. +- 1 — The query will be displayed with table UUID. Default value: `0`. @@ -3156,8 +3279,8 @@ Allows creation of experimental [live views](../../sql-reference/statements/crea Possible values: -- 0 — Working with live views is disabled. -- 1 — Working with live views is enabled. +- 0 — Working with live views is disabled. +- 1 — Working with live views is enabled. Default value: `0`. @@ -3185,8 +3308,8 @@ HTTP connection timeout (in seconds). Possible values: -- Any positive integer. -- 0 - Disabled (infinite timeout). +- Any positive integer. +- 0 - Disabled (infinite timeout). Default value: 1. @@ -3196,8 +3319,8 @@ HTTP send timeout (in seconds). Possible values: -- Any positive integer. -- 0 - Disabled (infinite timeout). +- Any positive integer. +- 0 - Disabled (infinite timeout). Default value: 180. @@ -3207,8 +3330,8 @@ HTTP receive timeout (in seconds). Possible values: -- Any positive integer. -- 0 - Disabled (infinite timeout). +- Any positive integer. +- 0 - Disabled (infinite timeout). Default value: 180. @@ -3218,8 +3341,8 @@ Defines the level of detail for the [CHECK TABLE](../../sql-reference/statements Possible values: -- 0 — the query shows a check status for every individual data part of a table. -- 1 — the query shows the general table check status. +- 0 — the query shows a check status for every individual data part of a table. +- 1 — the query shows the general table check status. Default value: `0`. @@ -3273,8 +3396,8 @@ Sets the maximum number of rows to get from the query result. It adjusts the val Possible values: -- 0 — The number of rows is not limited. -- Positive integer. +- 0 — The number of rows is not limited. +- Positive integer. Default value: `0`. @@ -3284,8 +3407,8 @@ Sets the number of rows to skip before starting to return rows from the query. I Possible values: -- 0 — No rows are skipped . -- Positive integer. +- 0 — No rows are skipped . +- Positive integer. Default value: `0`. @@ -3321,8 +3444,8 @@ Enables to fuse aggregate functions with identical argument. It rewrites query c Possible values: -- 0 — Functions with identical argument are not fused. -- 1 — Functions with identical argument are fused. +- 0 — Functions with identical argument are not fused. +- 1 — Functions with identical argument are fused. Default value: `0`. @@ -3362,8 +3485,8 @@ Enables to create databases with [Replicated](../../engines/database-engines/rep Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `0`. @@ -3373,8 +3496,8 @@ Sets how long initial DDL query should wait for Replicated database to precess p Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `300`. @@ -3384,9 +3507,9 @@ Sets timeout for DDL query responses from all hosts in cluster. If a DDL request Possible values: -- Positive integer. -- 0 — Async mode. -- Negative integer — infinite timeout. +- Positive integer. +- 0 — Async mode. +- Negative integer — infinite timeout. Default value: `180`. @@ -3396,21 +3519,21 @@ Sets format of distributed DDL query result. Possible values: -- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception. -- `none` — Is similar to throw, but distributed DDL query returns no result set. -- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. -- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. +- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception. +- `none` — Is similar to throw, but distributed DDL query returns no result set. +- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. +- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. Default value: `throw`. ## flatten_nested {#flatten-nested} -Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns. +Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns. Possible values: -- 1 — Nested column is flattened to separate arrays. -- 0 — Nested column stays a single array of tuples. +- 1 — Nested column is flattened to separate arrays. +- 0 — Nested column stays a single array of tuples. Default value: `1`. @@ -3470,12 +3593,12 @@ SETTINGS index_granularity = 8192 │ ## external_table_functions_use_nulls {#external-table-functions-use-nulls} -Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns. +Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md) table functions use Nullable columns. Possible values: -- 0 — The table function explicitly uses Nullable columns. -- 1 — The table function implicitly uses Nullable columns. +- 0 — The table function explicitly uses Nullable columns. +- 1 — The table function implicitly uses Nullable columns. Default value: `1`. @@ -3483,25 +3606,25 @@ Default value: `1`. If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays. -## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} +## optimize_use_projections {#optimize_use_projections} Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries. Possible values: -- 0 — Projection optimization disabled. -- 1 — Projection optimization enabled. +- 0 — Projection optimization disabled. +- 1 — Projection optimization enabled. Default value: `1`. ## force_optimize_projection {#force-optimize-projection} -Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting). +Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting). Possible values: -- 0 — Projection optimization is not obligatory. -- 1 — Projection optimization is obligatory. +- 0 — Projection optimization is not obligatory. +- 1 — Projection optimization is obligatory. Default value: `0`. @@ -3511,9 +3634,9 @@ Allows to set up waiting for actions to be executed on replicas by [ALTER](../.. Possible values: -- 0 — Do not wait. -- 1 — Wait for own execution. -- 2 — Wait for everyone. +- 0 — Do not wait. +- 1 — Wait for own execution. +- 2 — Wait for everyone. Default value: `1`. @@ -3523,9 +3646,9 @@ Specifies how long (in seconds) to wait for inactive replicas to execute [ALTER] Possible values: -- 0 — Do not wait. -- Negative integer — Wait for unlimited time. -- Positive integer — The number of seconds to wait. +- 0 — Do not wait. +- Negative integer — Wait for unlimited time. +- Positive integer — The number of seconds to wait. Default value: `120` seconds. @@ -3535,7 +3658,7 @@ Sets the maximum number of matches for a single regular expression per row. Use Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -3545,7 +3668,7 @@ Sets the maximum number of retries during a single HTTP read. Possible values: -- Positive integer. +- Positive integer. Default value: `1024`. @@ -3555,9 +3678,9 @@ Allows a user to write to [query_log](../../operations/system-tables/query_log.m Possible values: -- 0 — Queries are not logged in the system tables. -- Positive floating-point number in the range [0..1]. For example, if the setting value is `0.5`, about half of the queries are logged in the system tables. -- 1 — All queries are logged in the system tables. +- 0 — Queries are not logged in the system tables. +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0.5`, about half of the queries are logged in the system tables. +- 1 — All queries are logged in the system tables. Default value: `1`. @@ -3567,9 +3690,9 @@ Allows calculating the [if](../../sql-reference/functions/conditional-functions. Possible values: -- `enable` — Enables short-circuit function evaluation for functions that are suitable for it (can throw an exception or computationally heavy). -- `force_enable` — Enables short-circuit function evaluation for all functions. -- `disable` — Disables short-circuit function evaluation. +- `enable` — Enables short-circuit function evaluation for functions that are suitable for it (can throw an exception or computationally heavy). +- `force_enable` — Enables short-circuit function evaluation for all functions. +- `disable` — Disables short-circuit function evaluation. Default value: `enable`. @@ -3579,8 +3702,8 @@ Defines the maximum length for each regular expression in the [hyperscan multi-m Possible values: -- Positive integer. -- 0 - The length is not limited. +- Positive integer. +- 0 - The length is not limited. Default value: `0`. @@ -3614,7 +3737,7 @@ Exception: Regexp length too large. **See Also** -- [max_hyperscan_regexp_total_length](#max-hyperscan-regexp-total-length) +- [max_hyperscan_regexp_total_length](#max-hyperscan-regexp-total-length) ## max_hyperscan_regexp_total_length {#max-hyperscan-regexp-total-length} @@ -3622,8 +3745,8 @@ Sets the maximum length total of all regular expressions in each [hyperscan mult Possible values: -- Positive integer. -- 0 - The length is not limited. +- Positive integer. +- 0 - The length is not limited. Default value: `0`. @@ -3657,7 +3780,7 @@ Exception: Total regexp lengths too large. **See Also** -- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) +- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) ## enable_positional_arguments {#enable-positional-arguments} @@ -3665,8 +3788,8 @@ Enables or disables supporting positional arguments for [GROUP BY](../../sql-ref Possible values: -- 0 — Positional arguments aren't supported. -- 1 — Positional arguments are supported: column numbers can use instead of column names. +- 0 — Positional arguments aren't supported. +- 1 — Positional arguments are supported: column numbers can use instead of column names. Default value: `1`. @@ -3695,13 +3818,13 @@ Result: ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions} Enables or disables returning results of type: -- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth). -- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot). +- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth). +- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot). Possible values: -- 0 — Functions return `Date` or `DateTime` for all types of arguments. -- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise. +- 0 — Functions return `Date` or `DateTime` for all types of arguments. +- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise. Default value: `0`. @@ -3713,31 +3836,31 @@ Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.m Possible values: -- 0 — Automatic `PREWHERE` optimization is disabled. -- 1 — Automatic `PREWHERE` optimization is enabled. +- 0 — Automatic `PREWHERE` optimization is disabled. +- 1 — Automatic `PREWHERE` optimization is enabled. Default value: `1`. ## optimize_move_to_prewhere_if_final {#optimize_move_to_prewhere_if_final} -Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier. +Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier. Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables. Possible values: -- 0 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is disabled. -- 1 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is enabled. +- 0 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is disabled. +- 1 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is enabled. Default value: `0`. **See Also** -- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting +- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting ## optimize_using_constraints -Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`. +Use [constraints](../../sql-reference/statements/create/table.md#constraints) for query optimization. The default is `false`. Possible values: @@ -3745,7 +3868,7 @@ Possible values: ## optimize_append_index -Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`. +Use [constraints](../../sql-reference/statements/create/table.md#constraints) in order to append index condition. The default is `false`. Possible values: @@ -3753,7 +3876,7 @@ Possible values: ## optimize_substitute_columns -Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`. +Use [constraints](../../sql-reference/statements/create/table.md#constraints) for column substitution. The default is `false`. Possible values: @@ -3765,8 +3888,8 @@ Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/de Possible values: -- 0 — Subcolumns are not included in `DESCRIBE` queries. -- 1 — Subcolumns are included in `DESCRIBE` queries. +- 0 — Subcolumns are not included in `DESCRIBE` queries. +- 1 — Subcolumns are included in `DESCRIBE` queries. Default value: `0`. @@ -3782,8 +3905,8 @@ Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/parti Possible values: -- 0 — disable verbosity. -- 1 — enable verbosity. +- 0 — disable verbosity. +- 1 — enable verbosity. Default value: `0`. @@ -3816,8 +3939,8 @@ This is an experimental setting. Sets the minimum amount of memory for reading l Possible values: -- Positive integer. -- 0 — Big files read with only copying data from kernel to userspace. +- Positive integer. +- 0 — Big files read with only copying data from kernel to userspace. Default value: `0`. @@ -3827,8 +3950,8 @@ Enables or disables waiting unfinished queries when shutdown server. Possible values: -- 0 — Disabled. -- 1 — Enabled. The wait time equal shutdown_wait_unfinished config. +- 0 — Disabled. +- 1 — Enabled. The wait time equal shutdown_wait_unfinished config. Default value: 0. @@ -3940,8 +4063,8 @@ INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1; Possible values: -- 0 — Disallow. -- 1 — Allow. +- 0 — Disallow. +- 1 — Allow. Default value: `0`. @@ -3951,7 +4074,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## final {#final} -Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and +Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and distributed tables. Possible values: @@ -3997,7 +4120,7 @@ SELECT * FROM test; ## asterisk_include_materialized_columns {#asterisk_include_materialized_columns} -Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`). +Include [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) columns for wildcard query (`SELECT *`). Possible values: @@ -4008,7 +4131,7 @@ Default value: `0`. ## asterisk_include_alias_columns {#asterisk_include_alias_columns} -Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`). +Include [ALIAS](../../sql-reference/statements/create/table.md#alias) columns for wildcard query (`SELECT *`). Possible values: @@ -4016,3 +4139,123 @@ Possible values: - 1 - enabled Default value: `0`. + +## async_socket_for_remote {#async_socket_for_remote} + +Enables asynchronous read from socket while executing remote query. + +Enabled by default. + +## async_query_sending_for_remote {#async_query_sending_for_remote} + +Enables asynchronous connection creation and query sending while executing remote query. + +Enabled by default. + +## use_hedged_requests {#use_hedged_requests} + +Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query. +New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` +or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); +other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported. + +Enabled by default. + +## hedged_connection_timeout {#hedged_connection_timeout} + +If we can't establish connection with replica after this timeout in hedged requests, we start working with the next replica without cancelling connection to the previous. +Timeout value is in milliseconds. + +Default value: `50`. + +## receive_data_timeout {#receive_data_timeout} + +This timeout is set when the query is sent to the replica in hedged requests, if we don't receive first packet of data and we don't make any progress in query execution after this timeout, +we start working with the next replica, without cancelling connection to the previous. +Timeout value is in milliseconds. + +Default value: `2000` + +## allow_changing_replica_until_first_data_packet {#allow_changing_replica_until_first_data_packet} + +If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress +(but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress. + +## partial_result_on_first_cancel {#partial_result_on_first_cancel} +When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterwards, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests. + +**Example without setting on Ctrl+C** +```sql +SELECT sum(number) FROM numbers(10000000000) + +Cancelling query. +Ok. +Query was cancelled. + +0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.) +``` + +**Example with setting on Ctrl+C** +```sql +SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true + +┌──────sum(number)─┐ +│ 1355411451286266 │ +└──────────────────┘ + +1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.) +``` + +Possible values: `true`, `false` + +Default value: `false` +## function_json_value_return_type_allow_nullable + +Control whether allow to return `NULL` when value is not exist for JSON_VALUE function. + +```sql +SELECT JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true; + +┌─JSON_VALUE('{"hello":"world"}', '$.b')─┐ +│ ᴺᵁᴸᴸ │ +└────────────────────────────────────────┘ + +1 row in set. Elapsed: 0.001 sec. +``` + +Possible values: + +- true — Allow. +- false — Disallow. + +Default value: `false`. + +## function_json_value_return_type_allow_complex + +Control whether allow to return complex type (such as: struct, array, map) for json_value function. + +```sql +SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true + +┌─JSON_VALUE('{"hello":{"world":"!"}}', '$.hello')─┐ +│ {"world":"!"} │ +└──────────────────────────────────────────────────┘ + +1 row in set. Elapsed: 0.001 sec. +``` + +Possible values: + +- true — Allow. +- false — Disallow. + +Default value: `false`. + +## zstd_window_log_max + +Allows you to select the max window log of ZSTD (it will not be used for MergeTree family) + +Type: Int64 + +Default: 0 + diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 203fe4e42d2..5804ad8545b 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -45,11 +45,11 @@ Configuration markup: Required parameters: -- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. +- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. Optional parameters: -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. +- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. ## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} @@ -78,16 +78,16 @@ When writing the same file to `disk2`, it will actually be written to the physic Required parameters: -- `type` — `encrypted`. Otherwise the encrypted disk is not created. -- `disk` — Type of disk for data storage. -- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form. +- `type` — `encrypted`. Otherwise the encrypted disk is not created. +- `disk` — Type of disk for data storage. +- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form. You can specify multiple keys using the `id` attribute (see example above). Optional parameters: -- `path` — Path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory. -- `current_key_id` — The key used for encryption. All the specified keys can be used for decryption, and you can always switch to another key while maintaining access to previously encrypted data. -- `algorithm` — [Algorithm](/docs/en/sql-reference/statements/create/table.md/#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes. +- `path` — Path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory. +- `current_key_id` — The key used for encryption. All the specified keys can be used for decryption, and you can always switch to another key while maintaining access to previously encrypted data. +- `algorithm` — [Algorithm](/docs/en/sql-reference/statements/create/table.md/#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes. Example of disk configuration: @@ -135,11 +135,13 @@ Example of configuration for versions later or equal to 22.8: - -
- cache -
-
+ + +
+ cache +
+
+
``` @@ -159,16 +161,20 @@ Example of configuration for versions earlier than 22.8: - -
- s3 -
-
+ + +
+ s3 +
+
+
``` -Cache **configuration settings**: +File Cache **disk configuration settings**: + +These settings should be defined in the disk configuration section. - `path` - path to the directory with cache. Default: None, this setting is obligatory. @@ -178,7 +184,7 @@ Cache **configuration settings**: - `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`. -- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it. +- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it. - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. @@ -186,21 +192,23 @@ Cache **configuration settings**: - `max_elements` - a limit for a number of cache files. Default: `1048576`. -Cache **query settings**: +File Cache **query/profile settings**: + +Some of these settings will disable cache features per query/profile that are enabled by default or in disk configuration settings. For example, you can enable cache in disk configuration and disable it per query/profile setting `enable_filesystem_cache` to `false`. Also setting `cache_on_write_operations` to `true` in disk configuration means that "write-though" cache is enabled. But if you need to disable this general setting per specific queries then setting `enable_filesystem_cache_on_write_operations` to `false` means that write operations cache will be disabled for a specific query/profile. - `enable_filesystem_cache` - allows to disable cache per query even if storage policy was configured with `cache` disk type. Default: `true`. - `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`. -- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. +- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`. -- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. Default: `false`. +- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`. - `max_query_cache_size` - a limit for the cache size, which can be written to local cache storage. Requires enabled `enable_filesystem_query_cache_limit` in cache configuration. Default: `false`. -- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recentltly used` behaviour while keeping query cache limit. +- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recently used` behaviour while keeping query cache limit. -** Warning ** +**Warning** Cache configuration settings and cache query settings correspond to the latest ClickHouse version, for earlier versions something might not be supported. Cache **system tables**: @@ -211,7 +219,7 @@ Cache **system tables**: Cache **commands**: -- `SYSTEM DROP FILESYSTEM CACHE () (ON CLUSTER)` +- `SYSTEM DROP FILESYSTEM CACHE () (ON CLUSTER)` -- `ON CLUSTER` is only supported when no `` is provided - `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`) @@ -227,10 +235,10 @@ Result: └───────────┘ ``` -- `DESCRIBE CACHE ''` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`) +- `DESCRIBE FILESYSTEM CACHE ''` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW FILESYSTEM CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`) ```sql -DESCRIBE CACHE 's3_cache' +DESCRIBE FILESYSTEM CACHE 's3_cache' ``` ``` text @@ -442,14 +450,14 @@ SETTINGS storage_policy='web'; Required parameters: -- `type` — `web`. Otherwise the disk is not created. -- `endpoint` — The endpoint URL in `path` format. Endpoint URL must contain a root path to store data, where they were uploaded. +- `type` — `web`. Otherwise the disk is not created. +- `endpoint` — The endpoint URL in `path` format. Endpoint URL must contain a root path to store data, where they were uploaded. Optional parameters: -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1` Mb. -- `remote_fs_read_backoff_threashold` — The maximum wait time when trying to read data for remote disk. Default value: `10000` seconds. -- `remote_fs_read_backoff_max_tries` — The maximum number of attempts to read with backoff. Default value: `5`. +- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1` Mb. +- `remote_fs_read_backoff_threashold` — The maximum wait time when trying to read data for remote disk. Default value: `10000` seconds. +- `remote_fs_read_backoff_max_tries` — The maximum number of attempts to read with backoff. Default value: `5`. If a query fails with an exception `DB:Exception Unreachable URL`, then you can try to adjust the settings: [http_connection_timeout](/docs/en/operations/settings/settings.md/#http_connection_timeout), [http_receive_timeout](/docs/en/operations/settings/settings.md/#http_receive_timeout), [keep_alive_timeout](/docs/en/operations/server-configuration-parameters/settings.md/#keep-alive-timeout). @@ -467,6 +475,6 @@ Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#htt Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. -:::warning Zero-copy replication is not ready for production +:::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. ::: diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 8a2f25629f6..4290799b6bc 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -7,11 +7,11 @@ Contains the historical values for `system.asynchronous_metrics`, which are save Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. -- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. +- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. **Example** diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 551aa771ec9..f357341da67 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -7,9 +7,9 @@ Contains metrics that are calculated periodically in the background. For example Columns: -- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. -- `description` ([String](../../sql-reference/data-types/string.md) - Metric description) +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md) - Metric description) **Example** @@ -32,9 +32,592 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` +## Metric descriptions + + +### AsynchronousHeavyMetricsCalculationTimeSpent + +Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics). + +### AsynchronousHeavyMetricsUpdateInterval + +Heavy (tables related) metrics update interval + +### AsynchronousMetricsCalculationTimeSpent + +Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics). + +### AsynchronousMetricsUpdateInterval + +Metrics update interval + +### BlockActiveTime_*name* + +Time in seconds the block device had the IO requests queued. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardBytes_*name* + +Number of discarded bytes on the block device. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardMerges_*name* + +Number of discard operations requested from the block device and merged together by the OS IO scheduler. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardOps_*name* + +Number of discard operations requested from the block device. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardTime_*name* + +Time in seconds spend in discard operations requested from the block device, summed across all the operations. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockInFlightOps_*name* + +This value counts the number of I/O requests that have been issued to the device driver but have not yet completed. It does not include IO requests that are in the queue but not yet issued to the device driver. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockQueueTime_*name* + +This value counts the number of milliseconds that IO requests have waited on this block device. If there are multiple IO requests waiting, this value will increase as the product of the number of milliseconds times the number of requests waiting. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadBytes_*name* + +Number of bytes read from the block device. It can be lower than the number of bytes read from the filesystem due to the usage of the OS page cache, that saves IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadMerges_*name* + +Number of read operations requested from the block device and merged together by the OS IO scheduler. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadOps_*name* + +Number of read operations requested from the block device. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadTime_*name* + +Time in seconds spend in read operations requested from the block device, summed across all the operations. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteBytes_*name* + +Number of bytes written to the block device. It can be lower than the number of bytes written to the filesystem due to the usage of the OS page cache, that saves IO. A write to the block device may happen later than the corresponding write to the filesystem due to write-through caching. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteMerges_*name* + +Number of write operations requested from the block device and merged together by the OS IO scheduler. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteOps_*name* + +Number of write operations requested from the block device. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteTime_*name* + +Time in seconds spend in write operations requested from the block device, summed across all the operations. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### CPUFrequencyMHz_*name* + +The current frequency of the CPU, in MHz. Most of the modern CPUs adjust the frequency dynamically for power saving and Turbo Boosting. + +### CompiledExpressionCacheBytes + +Total bytes used for the cache of JIT-compiled code. + +### CompiledExpressionCacheCount + +Total entries in the cache of JIT-compiled code. + +### DiskAvailable_*name* + +Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB. + +### DiskTotal_*name* + +The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB. + +### DiskUnreserved_*name* + +Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB. + +### DiskUsed_*name* + +Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information. + +### FilesystemCacheBytes + +Total bytes in the `cache` virtual filesystem. This cache is hold on disk. + +### FilesystemCacheFiles + +Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk. + +### FilesystemLogsPathAvailableBytes + +Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file. + +### FilesystemLogsPathAvailableINodes + +The number of available inodes on the volume where ClickHouse logs path is mounted. + +### FilesystemLogsPathTotalBytes + +The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs. + +### FilesystemLogsPathTotalINodes + +The total number of inodes on the volume where ClickHouse logs path is mounted. + +### FilesystemLogsPathUsedBytes + +Used bytes on the volume where ClickHouse logs path is mounted. + +### FilesystemLogsPathUsedINodes + +The number of used inodes on the volume where ClickHouse logs path is mounted. + +### FilesystemMainPathAvailableBytes + +Available bytes on the volume where the main ClickHouse path is mounted. + +### FilesystemMainPathAvailableINodes + +The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full. + +### FilesystemMainPathTotalBytes + +The size of the volume where the main ClickHouse path is mounted, in bytes. + +### FilesystemMainPathTotalINodes + +The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration. + +### FilesystemMainPathUsedBytes + +Used bytes on the volume where the main ClickHouse path is mounted. + +### FilesystemMainPathUsedINodes + +The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files. + +### HTTPThreads + +Number of threads in the server of the HTTP interface (without TLS). + +### InterserverThreads + +Number of threads in the server of the replicas communication protocol (without TLS). + +### Jitter + +The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up. A proxy-indicator of overall system latency and responsiveness. + +### LoadAverage_*N* + +The whole system load, averaged with exponential smoothing over 1 minute. The load represents the number of threads across all the processes (the scheduling entities of the OS kernel), that are currently running by CPU or waiting for IO, or ready to run but not being scheduled at this point of time. This number includes all the processes, not only clickhouse-server. The number can be greater than the number of CPU cores, if the system is overloaded, and many processes are ready to run but waiting for CPU or IO. + +### MMapCacheCells + +The number of files opened with `mmap` (mapped in memory). This is used for queries with the setting `local_filesystem_read_method` set to `mmap`. The files opened with `mmap` are kept in the cache to avoid costly TLB flushes. + +### MarkCacheBytes + +Total size of mark cache in bytes + +### MarkCacheFiles + +Total number of mark files cached in the mark cache + +### MaxPartCountForPartition + +Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading. + +### MemoryCode + +The amount of virtual memory mapped for the pages of machine code of the server process, in bytes. + +### MemoryDataAndStack + +The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes. It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call. This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring. + +### MemoryResident + +The amount of physical memory used by the server process, in bytes. + +### MemoryShared + +The amount of memory used by the server process, that is also shared by another processes, in bytes. ClickHouse does not use shared memory, but some memory can be labeled by OS as shared for its own reasons. This metric does not make a lot of sense to watch, and it exists only for completeness reasons. + +### MemoryVirtual + +The size of the virtual address space allocated by the server process, in bytes. The size of the virtual address space is usually much greater than the physical memory consumption, and should not be used as an estimate for the memory consumption. The large values of this metric are totally normal, and makes only technical sense. + +### MySQLThreads + +Number of threads in the server of the MySQL compatibility protocol. + +### NetworkReceiveBytes_*name* + + Number of bytes received via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkReceiveDrop_*name* + + Number of bytes a packet was dropped while received via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkReceiveErrors_*name* + + Number of times error happened receiving via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkReceivePackets_*name* + + Number of network packets received via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendBytes_*name* + + Number of bytes sent via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendDrop_*name* + + Number of times a packed was dropped while sending via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendErrors_*name* + + Number of times error (e.g. TCP retransmit) happened while sending via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendPackets_*name* + + Number of network packets sent via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NumberOfDatabases + +Total number of databases on the server. + +### NumberOfDetachedByUserParts + +The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed. + +### NumberOfDetachedParts + +The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed. + +### NumberOfTables + +Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables. The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`. + +### OSContextSwitches + +The number of context switches that the system underwent on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSGuestNiceTime + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestNiceTimeCPU_*N* + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestNiceTimeNormalized + +The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSGuestTime + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestTimeCPU_*N* + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestTimeNormalized + +The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSIOWaitTime + +The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIOWaitTimeCPU_*N* + +The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIOWaitTimeNormalized + +The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSIdleTime + +The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIdleTimeCPU_*N* + +The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIdleTimeNormalized + +The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSInterrupts + +The number of interrupts on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSIrqTime + +The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate hardware misconfiguration or a very high network load. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIrqTimeCPU_*N* + +The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate hardware misconfiguration or a very high network load. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIrqTimeNormalized + +The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSMemoryAvailable + +The amount of memory available to be used by programs, in bytes. This is very similar to the `OSMemoryFreePlusCached` metric. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryBuffers + +The amount of memory used by OS kernel buffers, in bytes. This should be typically small, and large values may indicate a misconfiguration of the OS. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryCached + +The amount of memory used by the OS page cache, in bytes. Typically, almost all available memory is used by the OS page cache - high values of this metric are normal and expected. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryFreePlusCached + +The amount of free memory plus OS page cache memory on the host system, in bytes. This memory is available to be used by programs. The value should be very similar to `OSMemoryAvailable`. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryFreeWithoutCached + +The amount of free memory on the host system, in bytes. This does not include the memory used by the OS page cache memory, in bytes. The page cache memory is also available for usage by programs, so the value of this metric can be confusing. See the `OSMemoryAvailable` metric instead. For convenience we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable. See also https://www.linuxatemyram.com/. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryTotal + +The total amount of memory on the host system, in bytes. + +### OSNiceTime + +The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSNiceTimeCPU_*N* + +The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSNiceTimeNormalized + +The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSOpenFiles + +The total number of opened files on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSProcessesBlocked + +Number of threads blocked waiting for I/O to complete (`man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSProcessesCreated + +The number of processes created. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSProcessesRunning + +The number of runnable (running or ready to run) threads by the operating system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSSoftIrqTime + +The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate inefficient software running on the system. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSoftIrqTimeCPU_*N* + +The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate inefficient software running on the system. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSoftIrqTimeNormalized + +The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSStealTime + +The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Not every virtualized environments present this metric, and most of them don't. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSStealTimeCPU_*N* + +The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Not every virtualized environments present this metric, and most of them don't. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSStealTimeNormalized + +The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSSystemTime + +The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSystemTimeCPU_*N* + +The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSystemTimeNormalized + +The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSThreadsRunnable + +The total number of 'runnable' threads, as the OS kernel scheduler seeing it. + +### OSThreadsTotal + +The total number of threads, as the OS kernel scheduler seeing it. + +### OSUptime + +The uptime of the host server (the machine where ClickHouse is running), in seconds. + +### OSUserTime + +The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSUserTimeCPU_*N* + +The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSUserTimeNormalized + +The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### PostgreSQLThreads + +Number of threads in the server of the PostgreSQL compatibility protocol. + +### ReplicasMaxAbsoluteDelay + +Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. + +### ReplicasMaxInsertsInQueue + +Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables. + +### ReplicasMaxMergesInQueue + +Maximum number of merge operations in the queue (still to be applied) across Replicated tables. + +### ReplicasMaxQueueSize + +Maximum queue size (in the number of operations like get, merge) across Replicated tables. + +### ReplicasMaxRelativeDelay + +Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables. + +### ReplicasSumInsertsInQueue + +Sum of INSERT operations in the queue (still to be replicated) across Replicated tables. + +### ReplicasSumMergesInQueue + +Sum of merge operations in the queue (still to be applied) across Replicated tables. + +### ReplicasSumQueueSize + +Sum queue size (in the number of operations like get, merge) across Replicated tables. + +### TCPThreads + +Number of threads in the server of the TCP protocol (without TLS). + +### Temperature_*N* + +The temperature of the corresponding device in ℃. A sensor can return an unrealistic value. Source: `/sys/class/thermal` + +### Temperature_*name* + +The temperature reported by the corresponding hardware monitor and the corresponding sensor in ℃. A sensor can return an unrealistic value. Source: `/sys/class/hwmon` + +### TotalBytesOfMergeTreeTables + +Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family. + +### TotalPartsOfMergeTreeTables + +Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key. + +### TotalRowsOfMergeTreeTables + +Total amount of rows (records) stored in all tables of MergeTree family. + +### UncompressedCacheBytes + +Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided. + +### UncompressedCacheCells + +Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided. + +### Uptime + +The server uptime in seconds. It includes the time spent for server initialization before accepting connections. + +### jemalloc.active + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.allocated + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.dirty_purged + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.muzzy_purged + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.pactive + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.pdirty + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.pmuzzy + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.background_thread.num_runs + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.background_thread.num_threads + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.background_thread.run_intervals + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.epoch + +An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other `jemalloc` metrics. + +### jemalloc.mapped + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.metadata + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.metadata_thp + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.resident + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.retained + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + **See Also** -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. diff --git a/docs/en/operations/system-tables/build_options.md b/docs/en/operations/system-tables/build_options.md new file mode 100644 index 00000000000..5225d0ff99d --- /dev/null +++ b/docs/en/operations/system-tables/build_options.md @@ -0,0 +1,27 @@ +--- +slug: /en/operations/system-tables/build_options +--- +# build_options + +Contains information about the ClickHouse server's build options. + +Columns: + +- `name` (String) — Name of the build option, e.g. `USE_ODBC` +- `value` (String) — Value of the build option, e.g. `1` + +**Example** + +``` sql +SELECT * FROM system.build_options LIMIT 5 +``` + +``` text +┌─name─────────────┬─value─┐ +│ USE_BROTLI │ 1 │ +│ USE_BZIP2 │ 1 │ +│ USE_CAPNP │ 1 │ +│ USE_CASSANDRA │ 1 │ +│ USE_DATASKETCHES │ 1 │ +└──────────────────┴───────┘ +``` diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 2c5e2699b4f..deb9a0aaeb3 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -7,19 +7,22 @@ Contains information about clusters available in the config file and the servers Columns: -- `cluster` ([String](../../sql-reference/data-types/string.md)) — The cluster name. -- `shard_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The shard number in the cluster, starting from 1. -- `shard_weight` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The relative weight of the shard when writing data. -- `replica_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The replica number in the shard, starting from 1. -- `host_name` ([String](../../sql-reference/data-types/string.md)) — The host name, as specified in the config. -- `host_address` ([String](../../sql-reference/data-types/string.md)) — The host IP address obtained from DNS. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port to use for connecting to the server. -- `is_local` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the host is local. -- `user` ([String](../../sql-reference/data-types/string.md)) — The name of the user for connecting to the server. -- `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name. -- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica. -- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests. -- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — The cluster name. +- `shard_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The shard number in the cluster, starting from 1. +- `shard_weight` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The relative weight of the shard when writing data. +- `replica_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The replica number in the shard, starting from 1. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — The host name, as specified in the config. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — The host IP address obtained from DNS. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port to use for connecting to the server. +- `is_local` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the host is local. +- `user` ([String](../../sql-reference/data-types/string.md)) — The name of the user for connecting to the server. +- `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name. +- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica. +- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests. +- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal. +- `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database). +- `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database). +- `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown". **Example** @@ -47,6 +50,9 @@ default_database: errors_count: 0 slowdowns_count: 0 estimated_recovery_time: 0 +database_shard_name: +database_replica_name: +is_active: NULL Row 2: ────── @@ -63,10 +69,13 @@ default_database: errors_count: 0 slowdowns_count: 0 estimated_recovery_time: 0 +database_shard_name: +database_replica_name: +is_active: NULL ``` **See Also** -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) +- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index bdb35e24e37..ccdc2d8c742 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -11,27 +11,27 @@ Columns from [temporary tables](../../sql-reference/statements/create/table.md#t The `system.columns` table contains the following columns (the column type is shown in brackets): -- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `name` ([String](../../sql-reference/data-types/string.md)) — Column name. -- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. -- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. -- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. -- `comment` ([String](../../sql-reference/data-types/string.md)) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. -- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. -- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. -- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Column name. +- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. +- `comment` ([String](../../sql-reference/data-types/string.md)) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. +- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. **Example** diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index b871bb20f2e..f00329e2dbe 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -7,7 +7,7 @@ Contains information about contributors. The order is random at query execution Columns: -- `name` (String) — Contributor (author) name from git log. +- `name` (String) — Contributor (author) name from git log. **Example** diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index a44b0db8e9b..4d015a513a2 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -7,17 +7,17 @@ Contains information about stack traces for fatal errors. The table does not exi Columns: -- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. -- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. -- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. -- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process. -- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process. -- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. -- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler. +- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process. +- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler. **Example** @@ -46,6 +46,6 @@ build_id: ``` **See also** -- [trace_log](../../operations/system-tables/trace_log.md) system table +- [trace_log](../../operations/system-tables/trace_log.md) system table [Original article](https://clickhouse.com/docs/en/operations/system-tables/crash-log) diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index f1e233b33f7..188d94c50da 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -7,16 +7,16 @@ Contains information about existing data skipping indices in all the tables. Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `name` ([String](../../sql-reference/data-types/string.md)) — Index name. -- `type` ([String](../../sql-reference/data-types/string.md)) — Index type. -- `type_full` ([String](../../sql-reference/data-types/string.md)) — Index type expression from create statement. -- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. -- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Index name. +- `type` ([String](../../sql-reference/data-types/string.md)) — Index type. +- `type_full` ([String](../../sql-reference/data-types/string.md)) — Index type expression from create statement. +- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. **Example** diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index f914d5545d3..1392e977f60 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -7,9 +7,9 @@ Contains information about supported [data types](../../sql-reference/data-types Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name. -- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid. -- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias. +- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name. +- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid. +- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias. **Example** @@ -34,4 +34,4 @@ SELECT * FROM system.data_type_families WHERE alias_to = 'String' **See Also** -- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. +- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 8f0cc6e56d2..f3d3d388c36 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -7,13 +7,13 @@ Contains information about the databases that are available to the current user. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `engine` ([String](../../sql-reference/data-types/string.md)) — [Database engine](../../engines/database-engines/index.md). -- `data_path` ([String](../../sql-reference/data-types/string.md)) — Data path. -- `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path. -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID. -- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment. -- `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine. +- `name` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `engine` ([String](../../sql-reference/data-types/string.md)) — [Database engine](../../engines/database-engines/index.md). +- `data_path` ([String](../../sql-reference/data-types/string.md)) — Data path. +- `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID. +- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment. +- `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine. The `name` column from this system table is used for implementing the `SHOW DATABASES` query. diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 4b256f0de97..8632581144c 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -3,40 +3,40 @@ slug: /en/operations/system-tables/dictionaries --- # dictionaries -Contains information about [dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +Contains information about [dictionaries](../../sql-reference/dictionaries/index.md). Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. -- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md). -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Dictionary UUID. -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: - - `NOT_LOADED` — Dictionary was not loaded because it was not used. - - `LOADED` — Dictionary loaded successfully. - - `FAILED` — Unable to load the dictionary as a result of an error. - - `LOADING` — Dictionary is loading now. - - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). - - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. -- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). -- `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) provided by the dictionary. -- `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) provided by the dictionary. -- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary. -- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary. -- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. -- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. -- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found. -- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. -- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary. -- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. -- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes. -- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. -- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. -- `comment` ([String](../../sql-reference/data-types/string.md)) — Text of the comment to dictionary. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. +- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/index.md). +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Dictionary UUID. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: + - `NOT_LOADED` — Dictionary was not loaded because it was not used. + - `LOADED` — Dictionary loaded successfully. + - `FAILED` — Unable to load the dictionary as a result of an error. + - `LOADING` — Dictionary is loading now. + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). + - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. +- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory). +- `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. +- `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. +- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. +- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found. +- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/index.md#dictionary-sources) for the dictionary. +- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes. +- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. +- `comment` ([String](../../sql-reference/data-types/string.md)) — Text of the comment to dictionary. **Example** diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index a079f3338d2..ed67e2a2416 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -7,12 +7,12 @@ Contains information about disks defined in the [server configuration](../../eng Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. -- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. -- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. -- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. -- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running). -- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. +- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. +- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. +- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. +- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running). +- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. **Example** diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index acc68372a4c..8cccf946621 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -7,18 +7,18 @@ Contains information about [distributed ddl queries (ON CLUSTER clause)](../../s Columns: -- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. -- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. -- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. -- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. -- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. -- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. -- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). -- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). +- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). **Example** diff --git a/docs/en/operations/system-tables/distribution_queue.md b/docs/en/operations/system-tables/distribution_queue.md index 8f461590ee1..0dee805c022 100644 --- a/docs/en/operations/system-tables/distribution_queue.md +++ b/docs/en/operations/system-tables/distribution_queue.md @@ -7,25 +7,25 @@ Contains information about local files that are in the queue to be sent to the s Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `data_path` ([String](../../sql-reference/data-types/string.md)) — Path to the folder with local files. +- `data_path` ([String](../../sql-reference/data-types/string.md)) — Path to the folder with local files. -- `is_blocked` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag indicates whether sending local files to the server is blocked. +- `is_blocked` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag indicates whether sending local files to the server is blocked. -- `error_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of errors. +- `error_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of errors. -- `data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of local files in a folder. +- `data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of local files in a folder. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in local files, in bytes. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in local files, in bytes. -- `broken_data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of files that has been marked as broken (due to an error). +- `broken_data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of files that has been marked as broken (due to an error). -- `broken_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in broken files, in bytes. +- `broken_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in broken files, in bytes. -- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). **Example** @@ -48,4 +48,4 @@ last_exception: **See Also** -- [Distributed table engine](../../engines/table-engines/special/distributed.md) +- [Distributed table engine](../../engines/table-engines/special/distributed.md) diff --git a/docs/en/operations/system-tables/dropped_tables.md b/docs/en/operations/system-tables/dropped_tables.md new file mode 100644 index 00000000000..144c03109ac --- /dev/null +++ b/docs/en/operations/system-tables/dropped_tables.md @@ -0,0 +1,37 @@ +--- +slug: /en/operations/system-tables/dropped_tables +--- +# dropped_tables + +Contains information about tables that drop table has been executed but data cleanup has not been actually performed. + +Columns: + +- `index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Index in marked_dropped_tables queue. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid. +- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name. +- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory. +- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec` + +**Example** + +The following example shows how to get information about dropped_tables. + +``` sql +SELECT * +FROM system.dropped_tables\G +``` + +``` text +Row 1: +────── +index: 0 +database: default +table: test +uuid: 03141bb2-e97a-4d7c-a172-95cc066bb3bd +engine: MergeTree +metadata_dropped_path: /data/ClickHouse/build/programs/data/metadata_dropped/default.test.03141bb2-e97a-4d7c-a172-95cc066bb3bd.sql +table_dropped_time: 2023-03-16 23:43:31 +``` diff --git a/docs/en/operations/system-tables/errors.md b/docs/en/operations/system-tables/errors.md index 48437f43b7d..01762962152 100644 --- a/docs/en/operations/system-tables/errors.md +++ b/docs/en/operations/system-tables/errors.md @@ -7,13 +7,13 @@ Contains error codes with the number of times they have been triggered. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`). -- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error. -- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened. -- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened. -- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error. -- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. -- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query). +- `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`). +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened. +- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened. +- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error. +- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query). **Example** diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index 68217a6daaf..ba5602ee292 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -7,9 +7,9 @@ Contains information about the number of events that have occurred in the system Columns: -- `event` ([String](../../sql-reference/data-types/string.md)) — Event name. -- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. -- `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +- `event` ([String](../../sql-reference/data-types/string.md)) — Event name. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. +- `description` ([String](../../sql-reference/data-types/string.md)) — Event description. **Example** @@ -29,7 +29,7 @@ SELECT * FROM system.events LIMIT 5 **See Also** -- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index cb4d8e938eb..60bfa08975b 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,8 +7,8 @@ Contains information about normal and aggregate functions. Columns: -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. +- `name`(`String`) – The name of the function. +- `is_aggregate`(`UInt8`) — Whether the function is aggregate. **Example** diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index 7f41f5f07dd..b12f656cb75 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -6,20 +6,20 @@ slug: /en/operations/system-tables/grants Privileges granted to ClickHouse user accounts. Columns: -- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. -- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role assigned to user account. +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role assigned to user account. -- `access_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Access parameters for ClickHouse user account. +- `access_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Access parameters for ClickHouse user account. -- `database` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a database. +- `database` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a database. -- `table` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a table. +- `table` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a table. -- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a column to which access is granted. +- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a column to which access is granted. -- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether some privileges have been revoked. Possible values: +- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether some privileges have been revoked. Possible values: - `0` — The row describes a partial revoke. - `1` — The row describes a grant. -- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). +- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index 230fa4cede9..19a751b6ef2 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -7,12 +7,12 @@ Contains information about parameters [graphite_rollup](../../operations/server- Columns: -- `config_name` (String) - `graphite_rollup` parameter name. -- `regexp` (String) - A pattern for the metric name. -- `function` (String) - The name of the aggregating function. -- `age` (UInt64) - The minimum age of the data in seconds. -- `precision` (UInt64) - How precisely to define the age of the data in seconds. -- `priority` (UInt16) - Pattern priority. -- `is_default` (UInt8) - Whether the pattern is the default. -- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. -- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. +- `config_name` (String) - `graphite_rollup` parameter name. +- `regexp` (String) - A pattern for the metric name. +- `function` (String) - The name of the aggregating function. +- `age` (UInt64) - The minimum age of the data in seconds. +- `precision` (UInt64) - How precisely to define the age of the data in seconds. +- `priority` (UInt16) - Pattern priority. +- `is_default` (UInt8) - Whether the pattern is the default. +- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. +- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 284ba866cc8..508419783ef 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -11,14 +11,14 @@ pagination_next: 'en/operations/system-tables/asynchronous_metric_log' System tables provide information about: -- Server states, processes, and environment. -- Server’s internal processes. +- Server states, processes, and environment. +- Server’s internal processes. System tables: -- Located in the `system` database. -- Available only for reading data. -- Can’t be dropped or altered, but can be detached. +- Located in the `system` database. +- Available only for reading data. +- Can’t be dropped or altered, but can be detached. Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. @@ -26,12 +26,12 @@ Unlike other system tables, the system log tables [metric_log](../../operations/ System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are: -- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`. -- `table`: table to insert data. -- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. -- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. -- `flush_interval_milliseconds`: interval of flushing data to disk. -- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. +- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`. +- `table`: table to insert data. +- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. +- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. +- `flush_interval_milliseconds`: interval of flushing data to disk. +- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. An example: @@ -56,8 +56,8 @@ By default, table growth is unlimited. To control a size of a table, you can use For collecting system metrics ClickHouse server uses: -- `CAP_NET_ADMIN` capability. -- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux). +- `CAP_NET_ADMIN` capability. +- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux). **procfs** @@ -65,13 +65,13 @@ If ClickHouse server does not have `CAP_NET_ADMIN` capability, it tries to fall If procfs is supported and enabled on the system, ClickHouse server collects these metrics: -- `OSCPUVirtualTimeMicroseconds` -- `OSCPUWaitMicroseconds` -- `OSIOWaitMicroseconds` -- `OSReadChars` -- `OSWriteChars` -- `OSReadBytes` -- `OSWriteBytes` +- `OSCPUVirtualTimeMicroseconds` +- `OSCPUWaitMicroseconds` +- `OSIOWaitMicroseconds` +- `OSReadChars` +- `OSWriteChars` +- `OSReadBytes` +- `OSWriteBytes` ## Related content diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md index a8e516f02a3..07e9a9e2f58 100644 --- a/docs/en/operations/system-tables/information_schema.md +++ b/docs/en/operations/system-tables/information_schema.md @@ -22,10 +22,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA; `INFORMATION_SCHEMA` contains the following views: -- [COLUMNS](#columns) -- [SCHEMATA](#schemata) -- [TABLES](#tables) -- [VIEWS](#views) +- [COLUMNS](#columns) +- [SCHEMATA](#schemata) +- [TABLES](#tables) +- [VIEWS](#views) ## COLUMNS {#columns} @@ -33,29 +33,29 @@ Contains columns read from the [system.columns](../../operations/system-tables/c Columns: -- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `column_name` ([String](../../sql-reference/data-types/string.md)) — Column name. -- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. -- `column_default` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. -- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column type is `Nullable`. -- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type. -- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. -- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. -- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `column_name` ([String](../../sql-reference/data-types/string.md)) — Column name. +- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `column_default` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column type is `Nullable`. +- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. +- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. **Example** @@ -101,13 +101,13 @@ Contains columns read from the [system.databases](../../operations/system-tables Columns: -- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. -- `schema_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. -- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — Schema owner name, always `'default'`. -- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. +- `schema_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. +- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — Schema owner name, always `'default'`. +- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. **Example** @@ -137,15 +137,15 @@ Contains columns read from the [system.tables](../../operations/system-tables/ta Columns: -- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values: - - `BASE TABLE` - - `VIEW` - - `FOREIGN TABLE` - - `LOCAL TEMPORARY` - - `SYSTEM VIEW` +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values: + - `BASE TABLE` + - `VIEW` + - `FOREIGN TABLE` + - `LOCAL TEMPORARY` + - `SYSTEM VIEW` **Example** @@ -172,18 +172,18 @@ Contains columns read from the [system.tables](../../operations/system-tables/ta Columns: -- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view. -- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking. -- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated. -- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values: - - `NO` — The created view is not materialized. - - `YES` — The created view is materialized. -- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated. -- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not deleted. -- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, no data is inserted into the trigger. +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view. +- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking. +- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated. +- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values: + - `NO` — The created view is not materialized. + - `YES` — The created view is materialized. +- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated. +- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not deleted. +- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, no data is inserted into the trigger. **Example** diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index 672c79e335b..d8539908bf7 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -7,11 +7,11 @@ Contains information about settings for `MergeTree` tables. Columns: -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. +- `name` (String) — Setting name. +- `value` (String) — Setting value. +- `description` (String) — Setting description. +- `type` (String) — Setting type (implementation specific string value). +- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. **Example** ```sql diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index a5055639393..ec1d4c8e656 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -7,20 +7,20 @@ Contains information about merges and part mutations currently in process for ta Columns: -- `database` (String) — The name of the database the table is in. -- `table` (String) — Table name. -- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. -- `progress` (Float64) — The percentage of completed work from 0 to 1. -- `num_parts` (UInt64) — The number of pieces to be merged. -- `result_part_name` (String) — The name of the part that will be formed as the result of merging. -- `is_mutation` (UInt8) — 1 if this process is a part mutation. -- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. -- `total_size_marks` (UInt64) — The total number of marks in the merged parts. -- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. -- `rows_read` (UInt64) — Number of rows read. -- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. -- `rows_written` (UInt64) — Number of rows written. -- `memory_usage` (UInt64) — Memory consumption of the merge process. -- `thread_id` (UInt64) — Thread ID of the merge process. -- `merge_type` — The type of current merge. Empty if it's an mutation. -- `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. +- `database` (String) — The name of the database the table is in. +- `table` (String) — Table name. +- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. +- `progress` (Float64) — The percentage of completed work from 0 to 1. +- `num_parts` (UInt64) — The number of pieces to be merged. +- `result_part_name` (String) — The name of the part that will be formed as the result of merging. +- `is_mutation` (UInt8) — 1 if this process is a part mutation. +- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. +- `total_size_marks` (UInt64) — The total number of marks in the merged parts. +- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. +- `rows_read` (UInt64) — Number of rows read. +- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. +- `rows_written` (UInt64) — Number of rows written. +- `memory_usage` (UInt64) — Memory consumption of the merge process. +- `thread_id` (UInt64) — Thread ID of the merge process. +- `merge_type` — The type of current merge. Empty if it's an mutation. +- `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index b22e672a31b..9ea0dde3f80 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -6,9 +6,9 @@ slug: /en/operations/system-tables/metric_log Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. **Example** @@ -45,8 +45,8 @@ CurrentMetric_DistributedFilesToInsert: 0 **See also** -- [metric_log setting](../../operations/server-configuration-parameters/settings.md#metric_log) — Enabling and disabling the setting. -- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [metric_log setting](../../operations/server-configuration-parameters/settings.md#metric_log) — Enabling and disabling the setting. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics. +- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. +- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 141fc3c82c2..5a7dfd03eb4 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -7,9 +7,9 @@ Contains metrics which can be calculated instantly, or have a current value. For Columns: -- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. -- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. @@ -34,9 +34,723 @@ SELECT * FROM system.metrics LIMIT 10 └──────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────┘ ``` +## Metric descriptions + +### AggregatorThreads + +Number of threads in the Aggregator thread pool. + +### AggregatorThreadsActive + +Number of threads in the Aggregator thread pool running a task. + +### AsyncInsertCacheSize + +Number of async insert hash id in cache + +### AsynchronousInsertThreads + +Number of threads in the AsynchronousInsert thread pool. + +### AsynchronousInsertThreadsActive + +Number of threads in the AsynchronousInsert thread pool running a task. + +### AsynchronousReadWait + +Number of threads waiting for asynchronous read. + +### BackgroundBufferFlushSchedulePoolSize + +Limit on number of tasks in BackgroundBufferFlushSchedulePool + +### BackgroundBufferFlushSchedulePoolTask + +Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes + +### BackgroundCommonPoolSize + +Limit on number of tasks in an associated background pool + +### BackgroundCommonPoolTask + +Number of active tasks in an associated background pool + +### BackgroundDistributedSchedulePoolSize + +Limit on number of tasks in BackgroundDistributedSchedulePool + +### BackgroundDistributedSchedulePoolTask + +Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background. + +### BackgroundFetchesPoolSize + +Limit on number of simultaneous fetches in an associated background pool + +### BackgroundFetchesPoolTask + +Number of active fetches in an associated background pool + +### BackgroundMergesAndMutationsPoolSize + +Limit on number of active merges and mutations in an associated background pool + +### BackgroundMergesAndMutationsPoolTask + +Number of active merges and mutations in an associated background pool + +### BackgroundMessageBrokerSchedulePoolSize + +Limit on number of tasks in BackgroundProcessingPool for message streaming + +### BackgroundMessageBrokerSchedulePoolTask + +Number of active tasks in BackgroundProcessingPool for message streaming + +### BackgroundMovePoolSize + +Limit on number of tasks in BackgroundProcessingPool for moves + +### BackgroundMovePoolTask + +Number of active tasks in BackgroundProcessingPool for moves + +### BackgroundSchedulePoolSize + +Limit on number of tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. + +### BackgroundSchedulePoolTask + +Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. + +### BackupsIOThreads + +Number of threads in the BackupsIO thread pool. + +### BackupsIOThreadsActive + +Number of threads in the BackupsIO thread pool running a task. + +### BackupsThreads + +Number of threads in the thread pool for BACKUP. + +### BackupsThreadsActive + +Number of threads in thread pool for BACKUP running a task. + +### BrokenDistributedFilesToInsert + +Number of files for asynchronous insertion into Distributed tables that has been marked as broken. This metric will starts from 0 on start. Number of files for every shard is summed. + +### CacheDetachedFileSegments + +Number of existing detached cache file segments + +### CacheDictionaryThreads + +Number of threads in the CacheDictionary thread pool. + +### CacheDictionaryThreadsActive + +Number of threads in the CacheDictionary thread pool running a task. + +### CacheDictionaryUpdateQueueBatches + +Number of 'batches' (a set of keys) in update queue in CacheDictionaries. + +### CacheDictionaryUpdateQueueKeys + +Exact number of keys in update queue in CacheDictionaries. + +### CacheFileSegments + +Number of existing cache file segments + +### ContextLockWait + +Number of threads waiting for lock in Context. This is global lock. + +### DDLWorkerThreads + +Number of threads in the DDLWorker thread pool for ON CLUSTER queries. + +### DDLWorkerThreadsActive + +Number of threads in the DDLWORKER thread pool for ON CLUSTER queries running a task. + +### DatabaseCatalogThreads + +Number of threads in the DatabaseCatalog thread pool. + +### DatabaseCatalogThreadsActive + +Number of threads in the DatabaseCatalog thread pool running a task. + +### DatabaseOnDiskThreads + +Number of threads in the DatabaseOnDisk thread pool. + +### DatabaseOnDiskThreadsActive + +Number of threads in the DatabaseOnDisk thread pool running a task. + +### DatabaseOrdinaryThreads + +Number of threads in the Ordinary database thread pool. + +### DatabaseOrdinaryThreadsActive + +Number of threads in the Ordinary database thread pool running a task. + +### DelayedInserts + +Number of INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree table. + +### DestroyAggregatesThreads + +Number of threads in the thread pool for destroy aggregate states. + +### DestroyAggregatesThreadsActive + +Number of threads in the thread pool for destroy aggregate states running a task. + +### DictCacheRequests + +Number of requests in fly to data sources of dictionaries of cache type. + +### DiskObjectStorageAsyncThreads + +Number of threads in the async thread pool for DiskObjectStorage. + +### DiskObjectStorageAsyncThreadsActive + +Number of threads in the async thread pool for DiskObjectStorage running a task. + +### DiskSpaceReservedForMerge + +Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. + +### DistributedFilesToInsert + +Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed. + +### DistributedSend + +Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. + +### EphemeralNode + +Number of ephemeral nodes hold in ZooKeeper. + +### FilesystemCacheElements + +Filesystem cache elements (file segments) + +### FilesystemCacheReadBuffers + +Number of active cache buffers + +### FilesystemCacheSize + +Filesystem cache size in bytes + +### GlobalThread + +Number of threads in global thread pool. + +### GlobalThreadActive + +Number of threads in global thread pool running a task. + +### HTTPConnection + +Number of connections to HTTP server + +### HashedDictionaryThreads + +Number of threads in the HashedDictionary thread pool. + +### HashedDictionaryThreadsActive + +Number of threads in the HashedDictionary thread pool running a task. + +### IOPrefetchThreads + +Number of threads in the IO prefertch thread pool. + +### IOPrefetchThreadsActive + +Number of threads in the IO prefetch thread pool running a task. + +### IOThreads + +Number of threads in the IO thread pool. + +### IOThreadsActive + +Number of threads in the IO thread pool running a task. + +### IOUringInFlightEvents + +Number of io_uring SQEs in flight + +### IOUringPendingEvents + +Number of io_uring SQEs waiting to be submitted + +### IOWriterThreads + +Number of threads in the IO writer thread pool. + +### IOWriterThreadsActive + +Number of threads in the IO writer thread pool running a task. + +### InterserverConnection + +Number of connections from other replicas to fetch parts + +### KafkaAssignedPartitions + +Number of partitions Kafka tables currently assigned to + +### KafkaBackgroundReads + +Number of background reads currently working (populating materialized views from Kafka) + +### KafkaConsumers + +Number of active Kafka consumers + +### KafkaConsumersInUse + +Number of consumers which are currently used by direct or background reads + +### KafkaConsumersWithAssignment + +Number of active Kafka consumers which have some partitions assigned. + +### KafkaLibrdkafkaThreads + +Number of active librdkafka threads + +### KafkaProducers + +Number of active Kafka producer created + +### KafkaWrites + +Number of currently running inserts to Kafka + +### KeeperAliveConnections + +Number of alive connections + +### KeeperOutstandingRequets + +Number of outstanding requests + +### LocalThread + +Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool. + +### LocalThreadActive + +Number of threads in local thread pools running a task. + +### MMappedAllocBytes + +Sum bytes of mmapped allocations + +### MMappedAllocs + +Total number of mmapped allocations + +### MMappedFileBytes + +Sum size of mmapped file regions. + +### MMappedFiles + +Total number of mmapped files. + +### MarksLoaderThreads + +Number of threads in thread pool for loading marks. + +### MarksLoaderThreadsActive + +Number of threads in the thread pool for loading marks running a task. + +### MaxDDLEntryID + +Max processed DDL entry of DDLWorker. + +### MaxPushedDDLEntryID + +Max DDL entry of DDLWorker that pushed to zookeeper. + +### MemoryTracking + +Total amount of memory (bytes) allocated by the server. + +### Merge + +Number of executing background merges + +### MergeTreeAllRangesAnnouncementsSent + +The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side. + +### MergeTreeBackgroundExecutorThreads + +Number of threads in the MergeTreeBackgroundExecutor thread pool. + +### MergeTreeBackgroundExecutorThreadsActive + +Number of threads in the MergeTreeBackgroundExecutor thread pool running a task. + +### MergeTreeDataSelectExecutorThreads + +Number of threads in the MergeTreeDataSelectExecutor thread pool. + +### MergeTreeDataSelectExecutorThreadsActive + +Number of threads in the MergeTreeDataSelectExecutor thread pool running a task. + +### MergeTreePartsCleanerThreads + +Number of threads in the MergeTree parts cleaner thread pool. + +### MergeTreePartsCleanerThreadsActive + +Number of threads in the MergeTree parts cleaner thread pool running a task. + +### MergeTreePartsLoaderThreads + +Number of threads in the MergeTree parts loader thread pool. + +### MergeTreePartsLoaderThreadsActive + +Number of threads in the MergeTree parts loader thread pool running a task. + +### MergeTreeReadTaskRequestsSent + +The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side. + +### Move + +Number of currently executing moves + +### MySQLConnection + +Number of client connections using MySQL protocol + +### NetworkReceive + +Number of threads receiving data from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries. + +### NetworkSend + +Number of threads sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries. + +### OpenFileForRead + +Number of files open for reading + +### OpenFileForWrite + +Number of files open for writing + +### ParallelFormattingOutputFormatThreads + +Number of threads in the ParallelFormattingOutputFormatThreads thread pool. + +### ParallelFormattingOutputFormatThreadsActive + +Number of threads in the ParallelFormattingOutputFormatThreads thread pool running a task. + +### ParallelParsingInputFormatThreads + +Number of threads in the ParallelParsingInputFormat thread pool. + +### ParallelParsingInputFormatThreadsActive + +Number of threads in the ParallelParsingInputFormat thread pool running a task. + +### PartMutation + +Number of mutations (ALTER DELETE/UPDATE) + +### PartsActive + +Active data part, used by current and upcoming SELECTs. + +### PartsCommitted + +Deprecated. See PartsActive. + +### PartsCompact + +Compact parts. + +### PartsDeleteOnDestroy + +Part was moved to another disk and should be deleted in own destructor. + +### PartsDeleting + +Not active data part with identity refcounter, it is deleting right now by a cleaner. + +### PartsInMemory + +In-memory parts. + +### PartsOutdated + +Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes. + +### PartsPreActive + +The part is in data_parts, but not used for SELECTs. + +### PartsPreCommitted + +Deprecated. See PartsPreActive. + +### PartsTemporary + +The part is generating now, it is not in data_parts list. + +### PartsWide + +Wide parts. + +### PendingAsyncInsert + +Number of asynchronous inserts that are waiting for flush. + +### PostgreSQLConnection + +Number of client connections using PostgreSQL protocol + +### Query + +Number of executing queries + +### QueryPreempted + +Number of queries that are stopped and waiting due to 'priority' setting. + +### QueryThread + +Number of query processing threads + +### RWLockActiveReaders + +Number of threads holding read lock in a table RWLock. + +### RWLockActiveWriters + +Number of threads holding write lock in a table RWLock. + +### RWLockWaitingReaders + +Number of threads waiting for read on a table RWLock. + +### RWLockWaitingWriters + +Number of threads waiting for write on a table RWLock. + +### Read + +Number of read (read, pread, io_getevents, etc.) syscalls in fly + +### ReadTaskRequestsSent + +The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side. + +### ReadonlyReplica + +Number of Replicated tables that are currently in readonly state due to re-initialization after ZooKeeper session loss or due to startup without ZooKeeper configured. + +### RemoteRead + +Number of read with remote reader in fly + +### ReplicatedChecks + +Number of data parts checking for consistency + +### ReplicatedFetch + +Number of data parts being fetched from replica + +### ReplicatedSend + +Number of data parts being sent to replicas + +### RestartReplicaThreads + +Number of threads in the RESTART REPLICA thread pool. + +### RestartReplicaThreadsActive + +Number of threads in the RESTART REPLICA thread pool running a task. + +### RestoreThreads + +Number of threads in the thread pool for RESTORE. + +### RestoreThreadsActive + +Number of threads in the thread pool for RESTORE running a task. + +### Revision + +Revision of the server. It is a number incremented for every release or release candidate except patch releases. + +### S3Requests + +S3 requests + +### SendExternalTables + +Number of connections that are sending data for external tables to remote servers. External tables are used to implement GLOBAL IN and GLOBAL JOIN operators with distributed subqueries. + +### SendScalars + +Number of connections that are sending data for scalars to remote servers. + +### StartupSystemTablesThreads + +Number of threads in the StartupSystemTables thread pool. + +### StartupSystemTablesThreadsActive + +Number of threads in the StartupSystemTables thread pool running a task. + +### StorageBufferBytes + +Number of bytes in buffers of Buffer tables + +### StorageBufferRows + +Number of rows in buffers of Buffer tables + +### StorageDistributedThreads + +Number of threads in the StorageDistributed thread pool. + +### StorageDistributedThreadsActive + +Number of threads in the StorageDistributed thread pool running a task. + +### StorageHiveThreads + +Number of threads in the StorageHive thread pool. + +### StorageHiveThreadsActive + +Number of threads in the StorageHive thread pool running a task. + +### StorageS3Threads + +Number of threads in the StorageS3 thread pool. + +### StorageS3ThreadsActive + +Number of threads in the StorageS3 thread pool running a task. + +### SystemReplicasThreads + +Number of threads in the system.replicas thread pool. + +### SystemReplicasThreadsActive + +Number of threads in the system.replicas thread pool running a task. + +### TCPConnection + +Number of connections to TCP server (clients with native interface), also included server-server distributed query connections + +### TablesLoaderThreads + +Number of threads in the tables loader thread pool. + +### TablesLoaderThreadsActive + +Number of threads in the tables loader thread pool running a task. + +### TablesToDropQueueSize + +Number of dropped tables, that are waiting for background data removal. + +### TemporaryFilesForAggregation + +Number of temporary files created for external aggregation + +### TemporaryFilesForJoin + +Number of temporary files created for JOIN + +### TemporaryFilesForSort + +Number of temporary files created for external sorting + +### TemporaryFilesUnknown + +Number of temporary files created without known purpose + +### ThreadPoolFSReaderThreads + +Number of threads in the thread pool for local_filesystem_read_method=threadpool. + +### ThreadPoolFSReaderThreadsActive + +Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task. + +### ThreadPoolRemoteFSReaderThreads + +Number of threads in the thread pool for remote_filesystem_read_method=threadpool. + +### ThreadPoolRemoteFSReaderThreadsActive + +Number of threads in the thread pool for remote_filesystem_read_method=threadpool running a task. + +### ThreadsInOvercommitTracker + +Number of waiting threads inside of OvercommitTracker + +### TotalTemporaryFiles + +Number of temporary files created + +### VersionInteger + +Version of the server in a single integer number in base-1000. For example, version 11.22.33 is translated to 11022033. + +### Write + +Number of write (write, pwrite, io_getevents, etc.) syscalls in fly + +### ZooKeeperRequest + +Number of requests to ZooKeeper in fly. + +### ZooKeeperSession + +Number of sessions (connections) to ZooKeeper. Should be no more than one, because using more than one connection to ZooKeeper may lead to bugs due to lack of linearizability (stale reads) that ZooKeeper consistency model allows. + +### ZooKeeperWatch + +Number of watches (event subscriptions) in ZooKeeper. + **See Also** -- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/moves.md b/docs/en/operations/system-tables/moves.md index 54f07540507..cd543ce7536 100644 --- a/docs/en/operations/system-tables/moves.md +++ b/docs/en/operations/system-tables/moves.md @@ -7,21 +7,21 @@ The table contains information about in-progress [data part moves](/docs/en/sql- Columns: -- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part. +- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part. -- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started. +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started. -- `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving. +- `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving. -- `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system. +- `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system. -- `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved. +- `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved. -- `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size. +- `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement. **Example** @@ -37,6 +37,6 @@ SELECT * FROM system.moves **See Also** -- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine -- [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes) -- [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command +- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine +- [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes) +- [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index d65bccb9aaa..64e86992af9 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -5,31 +5,31 @@ slug: /en/operations/system-tables/mutations The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. -Columns: +## Columns: -- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied. +- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied. -- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied. +- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied. -- `mutation_id` ([String](/docs/en/sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. +- `mutation_id` ([String](/docs/en/sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. -- `command` ([String](/docs/en/sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). +- `command` ([String](/docs/en/sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). -- `create_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution. +- `create_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution. -- `block_numbers.partition_id` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty. +- `block_numbers.partition_id` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty. -- `block_numbers.number` ([Array](/docs/en/sql-reference/data-types/array.md)([Int64](/docs/en/sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition. +- `block_numbers.number` ([Array](/docs/en/sql-reference/data-types/array.md)([Int64](/docs/en/sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition. In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation. -- `parts_to_do_names` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete. +- `parts_to_do_names` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete. -- `parts_to_do` ([Int64](/docs/en/sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete. +- `parts_to_do` ([Int64](/docs/en/sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete. -- `is_done` ([UInt8](/docs/en/sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values: - - `1` if the mutation is completed, - - `0` if the mutation is still in process. +- `is_done` ([UInt8](/docs/en/sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values: + - `1` if the mutation is completed, + - `0` if the mutation is still in process. :::note Even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not completed yet because of a long-running `INSERT` query, that will create a new data part needed to be mutated. @@ -37,14 +37,27 @@ Even if `parts_to_do = 0` it is possible that a mutation of a replicated table i If there were problems with mutating some data parts, the following columns contain additional information: -- `latest_failed_part` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated. +- `latest_failed_part` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated. -- `latest_fail_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure. +- `latest_fail_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure. -- `latest_fail_reason` ([String](/docs/en/sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure. +- `latest_fail_reason` ([String](/docs/en/sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure. + +## Monitoring Mutations + +To track the progress on the system.mutations table, use a query like the following - this requires read permissions on the system.* tables: + +``` sql +SELECT * FROM clusterAllReplicas('cluster_name', 'db', system.mutations) +WHERE is_done=0 AND table='tmp'; +``` + +:::tip +replace `tmp` in `table='tmp'` with the name of the table that you are checking mutations on. +::: **See Also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) -- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine -- [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine +- [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index 9d8aea46218..a605a46c14c 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -7,23 +7,30 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp Columns: -- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query. +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query. -- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. -- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. -- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. -- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). +- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span. + - `INTERNAL` — Indicates that the span represents an internal operation within an application. + - `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. + - `CLIENT` — Indicates that the span describes a request to some remote service. + - `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. + - `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request. -- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). -- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). -- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. -- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. + +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. **Example** @@ -42,6 +49,7 @@ trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 span_id: 701487461015578150 parent_span_id: 2991972114672045096 operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +kind: INTERNAL start_time_us: 1612374594529090 finish_time_us: 1612374594529108 finish_date: 2021-02-03 @@ -51,4 +59,4 @@ attribute.values: [] **See Also** -- [OpenTelemetry](../../operations/opentelemetry.md) +- [OpenTelemetry](../../operations/opentelemetry.md) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 8ccde889289..c9e34962c79 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -9,41 +9,41 @@ This table contains information about events that occurred with [data parts](../ The `system.part_log` table contains the following columns: -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. -- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - - `NEW_PART` — Inserting of a new data part. - - `MERGE_PARTS` — Merging of data parts. - - `DOWNLOAD_PART` — Downloading a data part. - - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - - `MUTATE_PART` — Mutating of a data part. - - `MOVE_PART` — Moving the data part from the one disk to another one. -- `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — The reason for the event with type `MERGE_PARTS`. Can have one of the following values: - - `NOT_A_MERGE` — The current event has the type other than `MERGE_PARTS`. - - `REGULAR_MERGE` — Some regular merge. - - `TTL_DELETE_MERGE` — Cleaning up expired data. - - `TTL_RECOMPRESS_MERGE` — Recompressing data part with the. -- `merge_algorithm` ([Enum8](../../sql-reference/data-types/enum.md)) — Merge algorithm for the event with type `MERGE_PARTS`. Can have one of the following values: - - `UNDECIDED` - - `HORIZONTAL` - - `VERTICAL` -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision. -- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration. -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in. -- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`. -- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. -- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part. -- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes. -- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge). -- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error. -- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. +- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: + - `NEW_PART` — Inserting of a new data part. + - `MERGE_PARTS` — Merging of data parts. + - `DOWNLOAD_PART` — Downloading a data part. + - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). + - `MUTATE_PART` — Mutating of a data part. + - `MOVE_PART` — Moving the data part from the one disk to another one. +- `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — The reason for the event with type `MERGE_PARTS`. Can have one of the following values: + - `NOT_A_MERGE` — The current event has the type other than `MERGE_PARTS`. + - `REGULAR_MERGE` — Some regular merge. + - `TTL_DELETE_MERGE` — Cleaning up expired data. + - `TTL_RECOMPRESS_MERGE` — Recompressing data part with the. +- `merge_algorithm` ([Enum8](../../sql-reference/data-types/enum.md)) — Merge algorithm for the event with type `MERGE_PARTS`. Can have one of the following values: + - `UNDECIDED` + - `HORIZONTAL` + - `VERTICAL` +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision. +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in. +- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`. +- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part. +- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes. +- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge). +- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error. The `system.part_log` table is created after the first inserting data to the `MergeTree` table. diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 106d3c59dea..e61c6ed2ba4 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -9,107 +9,107 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: - - `YYYYMM` for automatic partitioning by month. - - `any_string` when partitioning manually. + - `YYYYMM` for automatic partitioning by month. + - `any_string` when partitioning manually. -- `name` ([String](../../sql-reference/data-types/string.md)) – Name of the data part. +- `name` ([String](../../sql-reference/data-types/string.md)) – Name of the data part. -- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. +- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. Possible Values: - - `Wide` — Each column is stored in a separate file in a filesystem. - - `Compact` — All columns are stored in one file in a filesystem. + - `Wide` — Each column is stored in a separate file in a filesystem. + - `Compact` — All columns are stored in one file in a filesystem. Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table. - - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. + - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. -- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). +- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). -- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of rows. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of rows. -- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of all the data part files in bytes. +- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of all the data part files in bytes. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. -- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. +- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. -- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. +- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. -- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks for secondary indices. +- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks for secondary indices. -- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation. +- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation. -- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time when the data part became inactive. +- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time when the data part became inactive. -- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. +- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. -- `min_date` ([Date](../../sql-reference/data-types/date.md)) – The minimum value of the date key in the data part. +- `min_date` ([Date](../../sql-reference/data-types/date.md)) – The minimum value of the date key in the data part. -- `max_date` ([Date](../../sql-reference/data-types/date.md)) – The maximum value of the date key in the data part. +- `max_date` ([Date](../../sql-reference/data-types/date.md)) – The maximum value of the date key in the data part. -- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The minimum value of the date and time key in the data part. +- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The minimum value of the date and time key in the data part. -- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) – The maximum value of the date and time key in the data part. +- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) – The maximum value of the date and time key in the data part. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) – ID of the partition. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) – ID of the partition. -- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The minimum number of data parts that make up the current part after merging. +- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The minimum number of data parts that make up the current part after merging. -- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The maximum number of data parts that make up the current part after merging. +- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The maximum number of data parts that make up the current part after merging. -- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. +- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. -- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). +- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). -- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values. +- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values. -- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) reserved for primary key values. +- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) reserved for primary key values. -- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition) +- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition) -- `database` ([String](../../sql-reference/data-types/string.md)) – Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) – Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) – Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) – Name of the table. -- `engine` ([String](../../sql-reference/data-types/string.md)) – Name of the table engine without parameters. +- `engine` ([String](../../sql-reference/data-types/string.md)) – Name of the table engine without parameters. -- `path` ([String](../../sql-reference/data-types/string.md)) – Absolute path to the folder with data part files. +- `path` ([String](../../sql-reference/data-types/string.md)) – Absolute path to the folder with data part files. -- `disk_name` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part. +- `disk_name` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part. -- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of compressed files. +- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of compressed files. -- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). +- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). -- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. +- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. -- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -:::warning +:::note The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields. ::: -- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `bytes_on_disk`. +- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `bytes_on_disk`. -- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `marks_bytes`. +- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `marks_bytes`. **Example** @@ -165,5 +165,5 @@ move_ttl_info.max: [] **See Also** -- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) -- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl) +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl) diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md index d389d5bbb29..00d7164af59 100644 --- a/docs/en/operations/system-tables/parts_columns.md +++ b/docs/en/operations/system-tables/parts_columns.md @@ -9,93 +9,93 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: - - `YYYYMM` for automatic partitioning by month. - - `any_string` when partitioning manually. + - `YYYYMM` for automatic partitioning by month. + - `any_string` when partitioning manually. -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. -- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. +- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. Possible values: - - `Wide` — Each column is stored in a separate file in a filesystem. - - `Compact` — All columns are stored in one file in a filesystem. + - `Wide` — Each column is stored in a separate file in a filesystem. + - `Compact` — All columns are stored in one file in a filesystem. Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table. -- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. +- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. -- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). +- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). -- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows. -- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of all the data part files in bytes. +- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of all the data part files in bytes. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the file with marks. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the file with marks. -- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time the directory with the data part was modified. This usually corresponds to the time of data part creation. +- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time the directory with the data part was modified. This usually corresponds to the time of data part creation. -- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the data part became inactive. +- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the data part became inactive. -- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. +- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. -- `min_date` ([Date](../../sql-reference/data-types/date.md)) — The minimum value of the date key in the data part. +- `min_date` ([Date](../../sql-reference/data-types/date.md)) — The minimum value of the date key in the data part. -- `max_date` ([Date](../../sql-reference/data-types/date.md)) — The maximum value of the date key in the data part. +- `max_date` ([Date](../../sql-reference/data-types/date.md)) — The maximum value of the date key in the data part. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. -- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The minimum number of data parts that make up the current part after merging. +- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The minimum number of data parts that make up the current part after merging. -- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The maximum number of data parts that make up the current part after merging. +- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The maximum number of data parts that make up the current part after merging. -- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. +- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. -- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). +- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). -- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) used by primary key values. +- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) used by primary key values. -- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) reserved for primary key values. +- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) reserved for primary key values. -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `engine` ([String](../../sql-reference/data-types/string.md)) — Name of the table engine without parameters. +- `engine` ([String](../../sql-reference/data-types/string.md)) — Name of the table engine without parameters. -- `disk_name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk that stores the data part. +- `disk_name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk that stores the data part. -- `path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. +- `path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. -- `column` ([String](../../sql-reference/data-types/string.md)) — Name of the column. +- `column` ([String](../../sql-reference/data-types/string.md)) — Name of the column. -- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. -- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. -- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. -- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the column in bytes. +- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the column in bytes. -- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the column, in bytes. +- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the column, in bytes. -- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the decompressed data in the column, in bytes. +- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the decompressed data in the column, in bytes. -- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the column with marks, in bytes. +- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the column with marks, in bytes. -- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `bytes_on_disk`. +- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `bytes_on_disk`. -- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `marks_bytes`. +- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `marks_bytes`. **Example** @@ -146,4 +146,4 @@ column_marks_bytes: 48 **See Also** -- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 76219813ad7..2e729920ed0 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -7,17 +7,17 @@ This system table is used for implementing the `SHOW PROCESSLIST` query. Columns: -- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. -- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. -- `elapsed` (Float64) – The time in seconds since request execution started. -- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. -- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting. -- `query` (String) – The query text. For `INSERT`, it does not include the data to insert. -- `query_id` (String) – Query ID, if defined. -- `is_cancelled` (Int8) – Was query cancelled. -- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server). +- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. +- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. +- `elapsed` (Float64) – The time in seconds since request execution started. +- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. +- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting. +- `query` (String) – The query text. For `INSERT`, it does not include the data to insert. +- `query_id` (String) – Query ID, if defined. +- `is_cancelled` (Int8) – Was query cancelled. +- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server). ```sql SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index a2e7a9ebabd..5eedb5a5dae 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -1,24 +1,26 @@ -# system.processors_profile_log {#system-processors_profile_log} +# processors_profile_log This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)). Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. -- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. -- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor -- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs -- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query -- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. -- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. -- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). -- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. -- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. -- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. -- `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor. -- `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor. -- `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor. -- `output_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes generated by processor. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the event happened. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time with microseconds precision when the event happened. +- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor +- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs +- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. +- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query +- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. +- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. +- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). +- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. +- `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor. +- `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor. +- `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor. +- `output_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes generated by processor. **Example** Query: @@ -67,10 +69,10 @@ Result: Here you can see: -- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6. -- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6. -- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6. +- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6. +- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6. +- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6. **See Also** -- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) \ No newline at end of file +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index a04214f6488..71e1452cef1 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -34,82 +34,83 @@ You can use the [log_formatted_queries](../../operations/settings/settings.md#se Columns: -- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: - - `'QueryStart' = 1` — Successful start of query execution. - - `'QueryFinish' = 2` — Successful end of query execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. -- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. -- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. -- `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database. -- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. -- `formatted_query` ([String](../../sql-reference/data-types/string.md)) — Formatted query string. -- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries. -- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query. -- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query. -- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query. -- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query. -- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query. -- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution. -- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. -- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. -- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. -- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query as part of distributed query execution. -- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. -- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. -- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution). -- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution). -- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` ([String](../../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../../interfaces/cli.md). -- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. -- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP header `UserAgent` passed in the HTTP query. -- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query). -- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query. -- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) -- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. -- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. -- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. -- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. -- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. -- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. -- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. -- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. -- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. -- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. -- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: + - `'QueryStart' = 1` — Successful start of query execution. + - `'QueryFinish' = 2` — Successful end of query execution. + - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. + - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. +- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. +- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. +- `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `formatted_query` ([String](../../sql-reference/data-types/string.md)) — Formatted query string. +- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries. +- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query. +- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query. +- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query. +- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query. +- `partitions` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the partitions present in the query. +- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution. +- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query as part of distributed query execution. +- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. +- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. +- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution). +- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution). +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([String](../../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../../interfaces/cli.md). +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. +- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP header `UserAgent` passed in the HTTP query. +- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query). +- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) +- `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. +- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. +- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. +- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. +- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. +- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. +- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. +- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. +- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. +- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. +- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. **Example** @@ -189,4 +190,4 @@ used_table_functions: [] **See Also** -- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index d7bbaa63471..cdd23bb15db 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -18,52 +18,52 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. -- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. -- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. -- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. -- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. -- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. -- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. -- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` ([String](../../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../../interfaces/cli.md). -- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. -- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. -- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. +- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread. +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. +- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. +- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query for distributed query execution. +- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. +- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([String](../../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../../interfaces/cli.md). +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. +- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). **Example** @@ -116,5 +116,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr **See Also** -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — This table contains information about each view executed during a query. +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. +- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — This table contains information about each view executed during a query. diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index 55cab49e52f..e107e4f926c 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -18,33 +18,33 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision. -- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of view execution (sum of its stages) in milliseconds. -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `view_name` ([String](../../sql-reference/data-types/string.md)) — Name of the view. -- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID of the view. -- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the view. Values: - - `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). Should not appear in this log. - - `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized). - - `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view). -- `view_query` ([String](../../sql-reference/data-types/string.md)) — The query executed by the view. -- `view_target` ([String](../../sql-reference/data-types/string.md)) — The name of the view target table. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written rows. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written bytes. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this view. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events). -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the view. Values: - - `'QueryStart' = 1` — Successful start the view execution. Should not appear. - - `'QueryFinish' = 2` — Successful end of the view execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of the view execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the view execution. -- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. -- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. -- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision. +- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of view execution (sum of its stages) in milliseconds. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `view_name` ([String](../../sql-reference/data-types/string.md)) — Name of the view. +- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID of the view. +- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the view. Values: + - `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). Should not appear in this log. + - `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized). + - `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view). +- `view_query` ([String](../../sql-reference/data-types/string.md)) — The query executed by the view. +- `view_target` ([String](../../sql-reference/data-types/string.md)) — The name of the view target table. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written rows. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written bytes. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this view. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the view. Values: + - `'QueryStart' = 1` — Successful start the view execution. Should not appear. + - `'QueryFinish' = 2` — Successful end of the view execution. + - `'ExceptionBeforeStart' = 3` — Exception before the start of the view execution. + - `'ExceptionWhileProcessing' = 4` — Exception during the view execution. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. **Example** @@ -83,5 +83,5 @@ stack_trace: **See Also** -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 7286ad9efa9..0dca7c525f2 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -30,4 +30,4 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index ca8fc4d166f..a9748a2b464 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -20,9 +20,9 @@ Columns: - `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values: - `0` — The quota applies to users specify in the `apply_to_list`. - `1` — The quota applies to all users except those listed in `apply_to_except`. -- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to. +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../guides/sre/user-management/index.md#role-management) that the quota should be applied to. - `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to. ## See Also {#see-also} -- [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) +- [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index e29ffff6b41..a04018ac2c8 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -33,4 +33,4 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index e711d9a7784..122a03ca629 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -50,6 +50,7 @@ last_queue_update: 2021-10-12 14:50:08 absolute_delay: 99 total_replicas: 5 active_replicas: 5 +lost_part_count: 0 last_queue_update_exception: zookeeper_exception: replica_is_active: {'r1':1,'r2':1} @@ -57,42 +58,43 @@ replica_is_active: {'r1':1,'r2':1} Columns: -- `database` (`String`) - Database name -- `table` (`String`) - Table name -- `engine` (`String`) - Table engine name -- `is_leader` (`UInt8`) - Whether the replica is the leader. +- `database` (`String`) - Database name +- `table` (`String`) - Table name +- `engine` (`String`) - Table engine name +- `is_leader` (`UInt8`) - Whether the replica is the leader. Multiple replicas can be leaders at the same time. A replica can be prevented from becoming a leader using the `merge_tree` setting `replicated_can_become_leader`. The leaders are responsible for scheduling background merges. Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. -- `can_become_leader` (`UInt8`) - Whether the replica can be a leader. -- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. +- `can_become_leader` (`UInt8`) - Whether the replica can be a leader. +- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. This mode is turned on if the config does not have sections with ClickHouse Keeper, if an unknown error occurred when reinitializing sessions in ClickHouse Keeper, and during session reinitialization in ClickHouse Keeper. -- `is_session_expired` (`UInt8`) - the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`. -- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. -- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. -- `zookeeper_path` (`String`) - Path to table data in ClickHouse Keeper. -- `replica_name` (`String`) - Replica name in ClickHouse Keeper. Different replicas of the same table have different names. -- `replica_path` (`String`) - Path to replica data in ClickHouse Keeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’. -- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. -- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. -- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. -- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. -- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. -- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. -- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `is_session_expired` (`UInt8`) - the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`. +- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. +- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. +- `zookeeper_path` (`String`) - Path to table data in ClickHouse Keeper. +- `replica_name` (`String`) - Replica name in ClickHouse Keeper. Different replicas of the same table have different names. +- `replica_path` (`String`) - Path to replica data in ClickHouse Keeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’. +- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. +- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. +- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. +- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. +- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. +- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. +- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` The next 4 columns have a non-zero value only where there is an active session with ZK. -- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. -- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. -- `last_queue_update` (`DateTime`) - When the queue was updated last time. -- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. -- `total_replicas` (`UInt8`) - The total number of known replicas of this table. -- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas). -- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions. -- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper. -- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active. +- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. +- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. +- `last_queue_update` (`DateTime`) - When the queue was updated last time. +- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. +- `total_replicas` (`UInt8`) - The total number of known replicas of this table. +- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas). +- `lost_part_count` (`UInt64`) - The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase. +- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions. +- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper. +- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active. If you request all the columns, the table may work a bit slowly, since several reads from ClickHouse Keeper are made for each row. If you do not request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly. diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md index 9bd068e3c58..0f9be7bac83 100644 --- a/docs/en/operations/system-tables/replicated_fetches.md +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -7,37 +7,37 @@ Contains information about currently running background fetches. Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. -- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. -- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. -- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. -- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. -- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. -- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. -- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. -- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. -- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. -- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. +- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. -- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. **Example** @@ -68,4 +68,4 @@ thread_id: 54 **See Also** -- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables) +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables) diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index dcc61b8e6a7..dd8f6328688 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -7,55 +7,55 @@ Contains information about tasks from replication queues stored in ClickHouse Ke Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ClickHouse Keeper. Different replicas of the same table have different names. +- `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ClickHouse Keeper. Different replicas of the same table have different names. -- `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Position of the task in the queue. +- `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Position of the task in the queue. -- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ClickHouse Keeper. +- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ClickHouse Keeper. -- `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue, one of: +- `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue, one of: - - `GET_PART` — Get the part from another replica. - - `ATTACH_PART` — Attach the part, possibly from our own replica (if found in the `detached` folder). You may think of it as a `GET_PART` with some optimizations as they're nearly identical. - - `MERGE_PARTS` — Merge the parts. - - `DROP_RANGE` — Delete the parts in the specified partition in the specified number range. - - `CLEAR_COLUMN` — NOTE: Deprecated. Drop specific column from specified partition. - - `CLEAR_INDEX` — NOTE: Deprecated. Drop specific index from specified partition. - - `REPLACE_RANGE` — Drop a certain range of parts and replace them with new ones. - - `MUTATE_PART` — Apply one or several mutations to the part. - - `ALTER_METADATA` — Apply alter modification according to global /metadata and /columns paths. + - `GET_PART` — Get the part from another replica. + - `ATTACH_PART` — Attach the part, possibly from our own replica (if found in the `detached` folder). You may think of it as a `GET_PART` with some optimizations as they're nearly identical. + - `MERGE_PARTS` — Merge the parts. + - `DROP_RANGE` — Delete the parts in the specified partition in the specified number range. + - `CLEAR_COLUMN` — NOTE: Deprecated. Drop specific column from specified partition. + - `CLEAR_INDEX` — NOTE: Deprecated. Drop specific index from specified partition. + - `REPLACE_RANGE` — Drop a certain range of parts and replace them with new ones. + - `MUTATE_PART` — Apply one or several mutations to the part. + - `ALTER_METADATA` — Apply alter modification according to global /metadata and /columns paths. -- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution. +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution. -- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task. +- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task. -- `source_replica` ([String](../../sql-reference/data-types/string.md)) — Name of the source replica. +- `source_replica` ([String](../../sql-reference/data-types/string.md)) — Name of the source replica. -- `new_part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the new part. +- `new_part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the new part. -- `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — Names of parts to merge or update. +- `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — Names of parts to merge or update. -- `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the `DETACH_PARTS` task is in the queue. +- `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the `DETACH_PARTS` task is in the queue. -- `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether a specific task is being performed right now. +- `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether a specific task is being performed right now. -- `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of failed attempts to complete the task. +- `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of failed attempts to complete the task. -- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). -- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. +- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. -- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. -- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. +- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. -- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed. +- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed. -- `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation. +- `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation. **Example** @@ -89,4 +89,4 @@ last_postpone_time: 1970-01-01 03:00:00 **See Also** -- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md#query-language-system-replicated) +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md#query-language-system-replicated) diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 22f69fce7fb..ffe1f1d74e2 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -14,9 +14,9 @@ Columns: - `granted_role_name` ([String](../../sql-reference/data-types/string.md)) — Name of role granted to the `role_name` role. To grant one role to another one use `GRANT role1 TO role2`. - `granted_role_is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `granted_role` is a default role. Possible values: - - 1 — `granted_role` is a default role. - - 0 — `granted_role` is not a default role. + - 1 — `granted_role` is a default role. + - 0 — `granted_role` is not a default role. - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `granted_role` is a role with [ADMIN OPTION](../../sql-reference/statements/grant.md#admin-option-privilege) privilege. Possible values: - - 1 — The role has `ADMIN OPTION` privilege. - - 0 — The role without `ADMIN OPTION` privilege. + - 1 — The role has `ADMIN OPTION` privilege. + - 0 — The role without `ADMIN OPTION` privilege. diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 729c98c89f3..1614e0580b8 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/roles --- # roles -Contains information about configured [roles](../../operations/access-rights.md#role-management). +Contains information about configured [roles](../../guides/sre/user-management/index.md#role-management). Columns: @@ -13,4 +13,4 @@ Columns: ## See Also {#see-also} -- [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) +- [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index b21a9500825..e92ba1ece74 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -6,30 +6,30 @@ slug: /en/operations/system-tables/row_policies Contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a row policy. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a row policy. -- `short_name` ([String](../../sql-reference/data-types/string.md)) — Short name of a row policy. Names of row policies are compound, for example: myfilter ON mydb.mytable. Here "myfilter ON mydb.mytable" is the name of the row policy, "myfilter" is it's short name. +- `short_name` ([String](../../sql-reference/data-types/string.md)) — Short name of a row policy. Names of row policies are compound, for example: myfilter ON mydb.mytable. Here "myfilter ON mydb.mytable" is the name of the row policy, "myfilter" is it's short name. -- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. Empty if policy for database. -- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Row policy ID. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Row policy ID. -- `storage` ([String](../../sql-reference/data-types/string.md)) — Name of the directory where the row policy is stored. +- `storage` ([String](../../sql-reference/data-types/string.md)) — Name of the directory where the row policy is stored. -- `select_filter` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Condition which is used to filter rows. +- `select_filter` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Condition which is used to filter rows. -- `is_restrictive` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the row policy restricts access to rows, see [CREATE ROW POLICY](../../sql-reference/statements/create/row-policy.md#create-row-policy-as). Value: +- `is_restrictive` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the row policy restricts access to rows, see [CREATE ROW POLICY](../../sql-reference/statements/create/row-policy.md#create-row-policy-as). Value: - `0` — The row policy is defined with `AS PERMISSIVE` clause. - `1` — The row policy is defined with `AS RESTRICTIVE` clause. -- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the row policies set for all roles and/or users. +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the row policies set for all roles and/or users. -- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the row policies is applied. +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the row policies is applied. -- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The row policies is applied to all roles and/or users excepting of the listed ones. +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The row policies is applied to all roles and/or users excepting of the listed ones. ## See Also {#see-also} -- [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) +- [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) diff --git a/docs/en/operations/system-tables/schema_inference_cache.md b/docs/en/operations/system-tables/schema_inference_cache.md index 8a65f29bc30..8624ee9cec9 100644 --- a/docs/en/operations/system-tables/schema_inference_cache.md +++ b/docs/en/operations/system-tables/schema_inference_cache.md @@ -66,4 +66,4 @@ schema: id Nullable(Float64), age Nullable(Float64), name Nullab **See also** -- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) +- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md index e1bf8c3d63f..3085b1acaf4 100644 --- a/docs/en/operations/system-tables/server_settings.md +++ b/docs/en/operations/system-tables/server_settings.md @@ -8,12 +8,12 @@ Currently, the table shows only settings from the first layer of `config.xml` an Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name. -- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value. -- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value. -- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` -- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. -- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. +- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value. +- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` +- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. +- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. **Example** @@ -47,6 +47,6 @@ SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_siz **See also** -- [Settings](../../operations/system-tables/settings.md) -- [Configuration Files](../../operations/configuration-files.md) -- [Server Settings](../../operations/server-configuration-parameters/settings.md) +- [Settings](../../operations/system-tables/settings.md) +- [Configuration Files](../../operations/configuration-files.md) +- [Server Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index cdf86b57ef6..661d34677e4 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -7,42 +7,42 @@ Contains information about all successful and failed login and logout events. Columns: -- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values: - - `LoginFailure` — Login error. - - `LoginSuccess` — Successful login. - - `Logout` — Logout from the system. -- `auth_id` ([UUID](../../sql-reference/data-types/uuid.md)) — Authentication ID, which is a UUID that is automatically generated each time user logins. -- `session_id` ([String](../../sql-reference/data-types/string.md)) — Session ID that is passed by client via [HTTP](../../interfaces/http.md) interface. -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Login/logout date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Login/logout time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Login/logout starting time with microseconds precision. -- `user` ([String](../../sql-reference/data-types/string.md)) — User name. -- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)) — The authentication type. Possible values: - - `NO_PASSWORD` - - `PLAINTEXT_PASSWORD` - - `SHA256_PASSWORD` - - `DOUBLE_SHA1_PASSWORD` - - `LDAP` - - `KERBEROS` - - `SSL_CERTIFICATE` -- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. -- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. -- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. -- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — The IP address that was used to log in/out. -- `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to log in/out. -- `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — The interface from which the login was initiated. Possible values: - - `TCP` - - `HTTP` - - `gRPC` - - `MySQL` - - `PostgreSQL` -- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — The hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../../sql-reference/data-types/string.md)) — The `clickhouse-client` or another TCP client name. -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the `clickhouse-client` or another TCP client. -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The major version of the `clickhouse-client` or another TCP client. -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The minor version of the `clickhouse-client` or another TCP client. -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the `clickhouse-client` or another TCP client version. -- `failure_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message containing the reason for the login/logout failure. +- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values: + - `LoginFailure` — Login error. + - `LoginSuccess` — Successful login. + - `Logout` — Logout from the system. +- `auth_id` ([UUID](../../sql-reference/data-types/uuid.md)) — Authentication ID, which is a UUID that is automatically generated each time user logins. +- `session_id` ([String](../../sql-reference/data-types/string.md)) — Session ID that is passed by client via [HTTP](../../interfaces/http.md) interface. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Login/logout date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Login/logout time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Login/logout starting time with microseconds precision. +- `user` ([String](../../sql-reference/data-types/string.md)) — User name. +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)) — The authentication type. Possible values: + - `NO_PASSWORD` + - `PLAINTEXT_PASSWORD` + - `SHA256_PASSWORD` + - `DOUBLE_SHA1_PASSWORD` + - `LDAP` + - `KERBEROS` + - `SSL_CERTIFICATE` +- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. +- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. +- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. +- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — The IP address that was used to log in/out. +- `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to log in/out. +- `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — The interface from which the login was initiated. Possible values: + - `TCP` + - `HTTP` + - `gRPC` + - `MySQL` + - `PostgreSQL` +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — The hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The `clickhouse-client` or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the `clickhouse-client` or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The major version of the `clickhouse-client` or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The minor version of the `clickhouse-client` or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the `clickhouse-client` or another TCP client version. +- `failure_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message containing the reason for the login/logout failure. **Example** diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index a3dfa937abe..afae45077cc 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -7,16 +7,16 @@ Contains information about session settings for current user. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. -- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. -- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. -- `description` ([String](../../sql-reference/data-types/string.md)) — Short setting description. -- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). -- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). -- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: - - `0` — Current user can change the setting. - - `1` — Current user can’t change the setting. -- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. +- `description` ([String](../../sql-reference/data-types/string.md)) — Short setting description. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can’t change the setting. +- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. **Example** @@ -38,8 +38,8 @@ WHERE name LIKE '%min_i%' Using of `WHERE changed` can be useful, for example, when you want to check: -- Whether settings in configuration files are loaded correctly and are in use. -- Settings that changed in the current session. +- Whether settings in configuration files are loaded correctly and are in use. +- Settings that changed in the current session. @@ -49,7 +49,7 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' **See also** -- [Settings](../../operations/settings/index.md#session-settings-intro) -- [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly) -- [Constraints on Settings](../../operations/settings/constraints-on-settings.md) -- [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement +- [Settings](../../operations/settings/index.md#session-settings-intro) +- [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly) +- [Constraints on Settings](../../operations/settings/constraints-on-settings.md) +- [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index f4c77dfc76a..c1fc562e1e9 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -10,22 +10,22 @@ Describes the content of the settings profile: - Parent settings profiles. Columns: -- `profile_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting profile name. +- `profile_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting profile name. -- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. -- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role name. +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role name. -- `index` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Sequential number of the settings profile element. +- `index` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Sequential number of the settings profile element. -- `setting_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting name. +- `setting_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting name. -- `value` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting value. +- `value` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting value. -- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The minimum value of the setting. `NULL` if not set. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The minimum value of the setting. `NULL` if not set. -- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The maximum value of the setting. NULL if not set. +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The maximum value of the setting. NULL if not set. -- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — Profile that allows only read queries. +- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — Profile that allows only read queries. -- `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. +- `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index ef9068ae1b8..635a4e47dfd 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -6,20 +6,20 @@ slug: /en/operations/system-tables/settings_profiles Contains properties of configured setting profiles. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Setting profile name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting profile name. -- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Setting profile ID. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Setting profile ID. -- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of setting profiles. Configured in the `access_control_path` parameter. +- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of setting profiles. Configured in the `access_control_path` parameter. -- `num_elements` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of elements for this profile in the `system.settings_profile_elements` table. +- `num_elements` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of elements for this profile in the `system.settings_profile_elements` table. -- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the settings profile set for all roles and/or users. +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the settings profile set for all roles and/or users. -- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the setting profile is applied. +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the setting profile is applied. -- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The setting profile is applied to all roles and/or users excepting of the listed ones. +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The setting profile is applied to all roles and/or users excepting of the listed ones. ## See Also {#see-also} -- [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) +- [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index c64cf067220..52ee7088597 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -9,10 +9,14 @@ To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `a Columns: -- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Thread name. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](../system-tables/query_log.md) system table. -- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Thread name. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](../system-tables/query_log.md) system table. +- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. + +:::tip +Check out the Knowledge Base for some handy queries, including [how to see what threads are currently running](https://clickhouse.com/docs/knowledgebase/find-expensive-queries) and [useful queries for troubleshooting](https://clickhouse.com/docs/knowledgebase/useful-queries-for-troubleshooting). +::: **Example** @@ -88,7 +92,7 @@ res: /lib/x86_64-linux-gnu/libc-2.27.so **See Also** -- [Introspection Functions](../../sql-reference/functions/introspection.md) — Which introspection functions are available and how to use them. -- [system.trace_log](../system-tables/trace_log.md) — Contains stack traces collected by the sampling query profiler. -- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Description and usage example of the `arrayMap` function. -- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Description and usage example of the `arrayFilter` function. +- [Introspection Functions](../../sql-reference/functions/introspection.md) — Which introspection functions are available and how to use them. +- [system.trace_log](../system-tables/trace_log.md) — Contains stack traces collected by the sampling query profiler. +- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Description and usage example of the `arrayMap` function. +- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Description and usage example of the `arrayFilter` function. diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 966b677c7e3..5c7184b2b22 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -7,12 +7,20 @@ Contains information about storage policies and volumes defined in the [server c Columns: -- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. -- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`). -- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. +- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`). +- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. +- `volume_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of volume. Can have one of the following values: + - `JBOD` + - `SINGLE_DISK` + - `UNKNOWN` +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. +- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). +- `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values: + - `ROUND_ROBIN` + - `LEAST_USED` If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 5178f1640c7..08594739ecf 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -7,14 +7,14 @@ Contains description of table engines supported by server and their feature supp This table contains the following columns (the column type is shown in brackets): -- `name` (String) — The name of table engine. -- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. -- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). -- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). -- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. -- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). -- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. -- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). +- `name` (String) — The name of table engine. +- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. +- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). +- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). +- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. +- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). +- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. +- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). Example: @@ -34,6 +34,6 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') **See also** -- MergeTree family [query clauses](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) -- Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) -- Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) +- MergeTree family [query clauses](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) +- Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) +- Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 497e23dd7ca..82e9fa206ea 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -11,67 +11,67 @@ Contains metadata of each table that the server knows about. Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. +- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. -- `name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). -- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). +- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). -- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. +- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. -- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. +- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. -- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. +- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. -- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata. +- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata. -- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. +- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. -- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). +- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). -- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table. +- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table. -- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. +- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. -- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` query for view. +- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` query for view. -- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. +- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. -- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. +- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. -- `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. +- `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. -- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. +- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. -- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy: +- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy: - - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - - [Distributed](../../engines/table-engines/special/distributed.md#distributed) + - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) + - [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). +- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). -- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). +- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - If the table stores data in memory, returns approximated number of used bytes in memory. + - If the table stores data on disk, returns used space on disk (i.e. compressed). + - If the table stores data in memory, returns approximated number of used bytes in memory. -- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). +- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). -- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). +- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). -- `comment` ([String](../../sql-reference/data-types/string.md)) - The comment for the table. +- `comment` ([String](../../sql-reference/data-types/string.md)) - The comment for the table. -- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. +- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. -- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object). +- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object). -- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object). +- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object). -- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database. +- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database. -- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table. +- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table. The `system.tables` table is used in `SHOW TABLES` query implementation. diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index c0ddacc719c..897cefab0be 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -7,28 +7,28 @@ Contains logging entries. The logging level which goes to this table can be limi Columns: -- `event_date` (Date) — Date of the entry. -- `event_time` (DateTime) — Time of the entry. -- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. -- `microseconds` (UInt32) — Microseconds of the entry. -- `thread_name` (String) — Name of the thread from which the logging was done. -- `thread_id` (UInt64) — OS thread ID. -- `level` (`Enum8`) — Entry level. Possible values: - - `1` or `'Fatal'`. - - `2` or `'Critical'`. - - `3` or `'Error'`. - - `4` or `'Warning'`. - - `5` or `'Notice'`. - - `6` or `'Information'`. - - `7` or `'Debug'`. - - `8` or `'Trace'`. -- `query_id` (String) — ID of the query. -- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). -- `message` (String) — The message itself. -- `revision` (UInt32) — ClickHouse revision. -- `source_file` (LowCardinality(String)) — Source file from which the logging was done. -- `source_line` (UInt64) — Source line from which the logging was done. -- `message_format_string` (LowCardinality(String)) — A format string that was used to format the message. +- `event_date` (Date) — Date of the entry. +- `event_time` (DateTime) — Time of the entry. +- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. +- `microseconds` (UInt32) — Microseconds of the entry. +- `thread_name` (String) — Name of the thread from which the logging was done. +- `thread_id` (UInt64) — OS thread ID. +- `level` (`Enum8`) — Entry level. Possible values: + - `1` or `'Fatal'`. + - `2` or `'Critical'`. + - `3` or `'Error'`. + - `4` or `'Warning'`. + - `5` or `'Notice'`. + - `6` or `'Information'`. + - `7` or `'Debug'`. + - `8` or `'Trace'`. +- `query_id` (String) — ID of the query. +- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). +- `message` (String) — The message itself. +- `revision` (UInt32) — ClickHouse revision. +- `source_file` (LowCardinality(String)) — Source file from which the logging was done. +- `source_line` (UInt64) — Source line from which the logging was done. +- `message_format_string` (LowCardinality(String)) — A format string that was used to format the message. **Example** diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 498c9bfd217..906651b2960 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -7,7 +7,7 @@ Contains a list of time zones that are supported by the ClickHouse server. This Columns: -- `time_zone` (String) — List of supported time zones. +- `time_zone` (String) — List of supported time zones. **Example** diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 4994248ce5c..a5aae422be7 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,38 +12,38 @@ To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressTo Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. -- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. -- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: +- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: - - `Real` represents collecting stack traces by wall-clock time. - - `CPU` represents collecting stack traces by CPU time. - - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. - - `MemorySample` represents collecting random allocations and deallocations. - - `MemoryPeak` represents collecting updates of peak memory usage. - - `ProfileEvent` represents collecting of increments of profile events. + - `Real` represents collecting stack traces by wall-clock time. + - `CPU` represents collecting stack traces by CPU time. + - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. + - `MemorySample` represents collecting random allocations and deallocations. + - `MemoryPeak` represents collecting updates of peak memory usage. + - `ProfileEvent` represents collecting of increments of profile events. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. -- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. +- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. -- `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. +- `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. -- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. +- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. -- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0. +- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0. **Example** diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index b8c0403b8d6..58cdb82d31f 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -3,33 +3,33 @@ slug: /en/operations/system-tables/users --- # users -Contains a list of [user accounts](../../operations/access-rights.md#user-account-management) configured at the server. +Contains a list of [user accounts](../../guides/sre/user-management/index.md#user-account-management) configured at the server. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — User name. +- `name` ([String](../../sql-reference/data-types/string.md)) — User name. -- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — User ID. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — User ID. -- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter. +- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter. -- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://en.wikipedia.org/wiki/SHA-2)-encoded password, with [double SHA-1](https://en.wikipedia.org/wiki/SHA-1)-encoded password or with [bcrypt](https://en.wikipedia.org/wiki/Bcrypt)-encoded password. -- `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`. +- `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`. -- `host_ip` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — IP addresses of hosts that are allowed to connect to the ClickHouse server. +- `host_ip` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — IP addresses of hosts that are allowed to connect to the ClickHouse server. -- `host_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server. +- `host_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server. -- `host_names_regexp` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Regular expression for host names that are allowed to connect to the ClickHouse server. +- `host_names_regexp` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Regular expression for host names that are allowed to connect to the ClickHouse server. -- `host_names_like` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server, set using the LIKE predicate. +- `host_names_like` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server, set using the LIKE predicate. -- `default_roles_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that all granted roles set for user by default. +- `default_roles_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that all granted roles set for user by default. -- `default_roles_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of granted roles provided by default. +- `default_roles_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of granted roles provided by default. -- `default_roles_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — All the granted roles set as default excepting of the listed ones. +- `default_roles_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — All the granted roles set as default excepting of the listed ones. ## See Also {#see-also} -- [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) +- [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 503debf4302..7f948a238ac 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -16,20 +16,20 @@ It can be used to do a batch of Keeper path queries. Columns: -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. +- `name` (String) — The name of the node. +- `path` (String) — The path to the node. +- `value` (String) — Node value. +- `dataLength` (Int32) — Size of the value. +- `numChildren` (Int32) — Number of descendants. +- `czxid` (Int64) — ID of the transaction that created the node. +- `mzxid` (Int64) — ID of the transaction that last changed the node. +- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. +- `ctime` (DateTime) — Time of node creation. +- `mtime` (DateTime) — Time of the last modification of the node. +- `version` (Int32) — Node version: the number of times the node was changed. +- `cversion` (Int32) — Number of added or removed descendants. +- `aversion` (Int32) — Number of changes to the ACL. +- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. Example: diff --git a/docs/en/operations/system-tables/zookeeper_connection.md b/docs/en/operations/system-tables/zookeeper_connection.md new file mode 100644 index 00000000000..9438cda1808 --- /dev/null +++ b/docs/en/operations/system-tables/zookeeper_connection.md @@ -0,0 +1,29 @@ +--- +slug: /en/operations/system-tables/zookeeper_connection +--- +#zookeeper_connection + +This table does not exist if ZooKeeper is not configured. The 'system.zookeeper_connection' table shows current connections to ZooKeeper (including auxiliary ZooKeepers). Each row shows information about one connection. + +Columns: + +- `name` ([String](../../sql-reference/data-types/string.md)) — ZooKeeper cluster's name. +- `host` ([String](../../sql-reference/data-types/string.md)) — The hostname/IP of the ZooKeeper node that ClickHouse connected to. +- `port` ([String](../../sql-reference/data-types/string.md)) — The port of the ZooKeeper node that ClickHouse connected to. +- `index` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config. +- `connected_time` ([String](../../sql-reference/data-types/string.md)) — When the connection was established +- `is_expired` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the current connection expired. +- `keeper_api_version` ([String](../../sql-reference/data-types/string.md)) — Keeper API version. +- `client_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Session id of the connection. + +Example: + +``` sql +SELECT * FROM system.zookeeper_connection; +``` + +``` text +┌─name──────────────┬─host─────────┬─port─┬─index─┬──────connected_time─┬─is_expired─┬─keeper_api_version─┬──────────client_id─┐ +│ default_zookeeper │ 127.0.0.1 │ 2181 │ 0 │ 2023-05-19 14:30:16 │ 0 │ 0 │ 216349144108826660 │ +└───────────────────┴──────────────┴──────┴───────┴─────────────────────┴────────────┴────────────────────┴────────────────────┘ +``` diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index 58c44325737..970ed192a48 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -9,46 +9,46 @@ For requests, only columns with request parameters are filled in, and the remain Columns with request parameters: -- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values: - - `Request` — The request has been sent. - - `Response` — The response was received. - - `Finalize` — The connection is lost, no response was received. -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. -- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request. -- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection. -- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row. -- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The request whether the [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) has been set. -- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — The type of request or response. -- `path` ([String](../../sql-reference/data-types/string.md)) — The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path. -- `data` ([String](../../sql-reference/data-types/string.md)) — The data written to the ZooKeeper node (for the `SET` and `CREATE` requests — what the request wanted to write, for the response to the `GET` request — what was read) or an empty string. -- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes). -- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming). -- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — The version of the ZooKeeper node that the request expects when executing. This is supported for `CHECK`, `SET`, `REMOVE` requests (is relevant `-1` if the request does not check the version or `NULL` for other requests that do not support version checking). -- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same `xid`. -- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of the request included in multi request (for multi request — `0`, then in order from `1`). +- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values: + - `Request` — The request has been sent. + - `Response` — The response was received. + - `Finalize` — The connection is lost, no response was received. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. +- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request. +- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection. +- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row. +- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The request whether the [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) has been set. +- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — The type of request or response. +- `path` ([String](../../sql-reference/data-types/string.md)) — The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path. +- `data` ([String](../../sql-reference/data-types/string.md)) — The data written to the ZooKeeper node (for the `SET` and `CREATE` requests — what the request wanted to write, for the response to the `GET` request — what was read) or an empty string. +- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes). +- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming). +- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — The version of the ZooKeeper node that the request expects when executing. This is supported for `CHECK`, `SET`, `REMOVE` requests (is relevant `-1` if the request does not check the version or `NULL` for other requests that do not support version checking). +- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same `xid`. +- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of the request included in multi request (for multi request — `0`, then in order from `1`). Columns with request response parameters: -- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed). -- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them: - - `ZOK` — The request was executed seccessfully. - - `ZCONNECTIONLOSS` — The connection was lost. - - `ZOPERATIONTIMEOUT` — The request execution timeout has expired. - - `ZSESSIONEXPIRED` — The session has expired. - - `NULL` — The request is completed. -- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The type of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. -- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The status of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. -- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`. -- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created. -- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node. -- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node. -- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node. -- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node. -- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node. -- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of children of this ZooKeeper node. -- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — The list of child ZooKeeper nodes (for responses to `LIST` request). +- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed). +- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them: + - `ZOK` — The request was executed seccessfully. + - `ZCONNECTIONLOSS` — The connection was lost. + - `ZOPERATIONTIMEOUT` — The request execution timeout has expired. + - `ZSESSIONEXPIRED` — The session has expired. + - `NULL` — The request is completed. +- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The type of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. +- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The status of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. +- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`. +- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created. +- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node. +- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node. +- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node. +- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node. +- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node. +- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of children of this ZooKeeper node. +- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — The list of child ZooKeeper nodes (for responses to `LIST` request). **Example** @@ -128,5 +128,5 @@ children: ['query-0000000006','query-0000000005','query-0000000004','que **See Also** -- [ZooKeeper](../../operations/tips.md#zookeeper) -- [ZooKeeper guide](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html) +- [ZooKeeper](../../operations/tips.md#zookeeper) +- [ZooKeeper guide](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index da34a6b7e9c..8f6cf6ad147 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -51,10 +51,14 @@ But for storing archives with rare queries, shelves will work. ## RAID {#raid} When using HDD, you can combine their RAID-10, RAID-5, RAID-6 or RAID-50. -For Linux, software RAID is better (with `mdadm`). We do not recommend using LVM. +For Linux, software RAID is better (with `mdadm`). When creating RAID-10, select the `far` layout. If your budget allows, choose RAID-10. +LVM by itself (without RAID or `mdadm`) is ok, but making RAID with it or combining it with `mdadm` is a less explored option, and there will be more chances for mistakes +(selecting wrong chunk size; misalignment of chunks; choosing a wrong raid type; forgetting to cleanup disks). If you are confident +in using LVM, there is nothing against using it. + If you have more than 4 disks, use RAID-6 (preferred) or RAID-50, instead of RAID-5. When using RAID-5, RAID-6 or RAID-50, always increase stripe_cache_size, since the default value is usually not the best choice. @@ -70,7 +74,7 @@ Never set the block size too small or too large. You can use RAID-0 on SSD. Regardless of RAID use, always use replication for data security. -Enable NCQ with a long queue. For HDD, choose the CFQ scheduler, and for SSD, choose noop. Don’t reduce the ‘readahead’ setting. +Enable NCQ with a long queue. For HDD, choose the mq-deadline or CFQ scheduler, and for SSD, choose noop. Don’t reduce the ‘readahead’ setting. For HDD, enable the write cache. Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is enabled for NVME and SSD disks in your OS (usually it's implemented using a cronjob or systemd service). @@ -126,7 +130,7 @@ Otherwise you may get `Illegal instruction` crashes when hypervisor is run on ol ## ClickHouse Keeper and ZooKeeper {#zookeeper} -ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters. See the documentation for [ClickHouse Keeper](clickhouse-keeper.md) +ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters. See the documentation for [ClickHouse Keeper](../guides/sre/keeper/index.md) If you would like to continue using ZooKeeper then it is best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated. @@ -134,7 +138,7 @@ You should never use manually written scripts to transfer data between different If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters. -You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. +You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. For production environments we suggest to use separate servers for ClickHouse and ZooKeeper/Keeper, or place ClickHouse files and Keeper files on to separate disks. Because ZooKeeper/Keeper are very sensitive for disk latency and ClickHouse may utilize all available system resources. You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers. diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 0126cda160a..8620b44c368 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -41,24 +41,24 @@ clickhouse-benchmark [keys] < queries_file; ## Keys {#clickhouse-benchmark-keys} -- `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. -- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. -- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). -- `-r`, `--randomize` — Random order of queries execution if there is more than one input query. -- `-s`, `--secure` — Using `TLS` connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). -- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. -- `--cumulative` — Printing cumulative data instead of data per interval. -- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. -- `--user=USERNAME` — ClickHouse user name. Default value: `default`. -- `--password=PSWD` — ClickHouse user password. Default value: empty string. -- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns an answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. -- `--help` — Shows the help message. +- `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. +- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. +- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. +- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. +- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). +- `-r`, `--randomize` — Random order of queries execution if there is more than one input query. +- `-s`, `--secure` — Using `TLS` connection. +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). +- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. +- `--cumulative` — Printing cumulative data instead of data per interval. +- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. +- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. +- `--user=USERNAME` — ClickHouse user name. Default value: `default`. +- `--password=PSWD` — ClickHouse user password. Default value: empty string. +- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. +- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns an answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. +- `--help` — Shows the help message. If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. @@ -91,19 +91,19 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul In the report you can find: -- Number of queries in the `Queries executed:` field. +- Number of queries in the `Queries executed:` field. -- Status string containing (in order): +- Status string containing (in order): - - Endpoint of ClickHouse server. - - Number of processed queries. - - QPS: How many queries the server performed per second during a period specified in the `--delay` argument. - - RPS: How many rows the server reads per second during a period specified in the `--delay` argument. - - MiB/s: How many mebibytes the server reads per second during a period specified in the `--delay` argument. - - result RPS: How many rows placed by the server to the result of a query per second during a period specified in the `--delay` argument. - - result MiB/s. How many mebibytes placed by the server to the result of a query per second during a period specified in the `--delay` argument. + - Endpoint of ClickHouse server. + - Number of processed queries. + - QPS: How many queries the server performed per second during a period specified in the `--delay` argument. + - RPS: How many rows the server reads per second during a period specified in the `--delay` argument. + - MiB/s: How many mebibytes the server reads per second during a period specified in the `--delay` argument. + - result RPS: How many rows placed by the server to the result of a query per second during a period specified in the `--delay` argument. + - result MiB/s. How many mebibytes placed by the server to the result of a query per second during a period specified in the `--delay` argument. -- Percentiles of queries execution time. +- Percentiles of queries execution time. ## Comparison Mode {#clickhouse-benchmark-comparison-mode} diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index 87280bc3ba8..a9b82404b90 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -8,7 +8,7 @@ sidebar_label: clickhouse-copier Copies data from the tables in one cluster to tables in another (or the same) cluster. -:::warning +:::note To get a consistent copy, the data in the source tables and partitions should not change during the entire process. ::: @@ -16,12 +16,12 @@ You can run multiple `clickhouse-copier` instances on different servers to perfo After starting, `clickhouse-copier`: -- Connects to ClickHouse Keeper and receives: +- Connects to ClickHouse Keeper and receives: - - Copying jobs. - - The state of the copying jobs. + - Copying jobs. + - The state of the copying jobs. -- It performs the jobs. +- It performs the jobs. Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. @@ -39,12 +39,12 @@ $ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-d Parameters: -- `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. -- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. -- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. +- `daemon` — Starts `clickhouse-copier` in daemon mode. +- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. +- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. +- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. +- `task-upload-force` — Force upload `task-file` even if node already exists. +- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. ## Format of keeper.xml {#format-of-zookeeper-xml} diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md index bf2e618b791..101310cc65e 100644 --- a/docs/en/operations/utilities/clickhouse-format.md +++ b/docs/en/operations/utilities/clickhouse-format.md @@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order Result: -```text +```sql SELECT number FROM numbers(10) WHERE number % 2 @@ -54,7 +54,7 @@ $ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNIO Result: -```text +```sql SELECT * FROM ( @@ -75,7 +75,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE Result: -```text +```sql SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END; ``` @@ -87,7 +87,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE Result: -```text +```sql SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END; ``` @@ -99,7 +99,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE Result: -```text +```sql SELECT * \ FROM \ ( \ diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index a4fa5579638..0443a80cf17 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -4,52 +4,202 @@ sidebar_position: 60 sidebar_label: clickhouse-local --- -# clickhouse-local +# clickhouse-local -The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. +## Related Content -Accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/). +- Blog: [Extracting, Converting, and Querying Data in Local Files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) -`clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines. +## When to use clickhouse-local vs. ClickHouse -By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument. +`clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process. -:::warning -It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error. +While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`. + +Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). + +## Download clickhouse-local + +`clickhouse-local` is executed using the same `clickhouse` binary that runs the ClickHouse server and `clickhouse-client`. The easiest way to download the latest version is with the following command: + +```bash +curl https://clickhouse.com/ | sh +``` + +:::note +The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx). +::: + +## Query data in a CSV file using SQL + +A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. + +If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews: + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv')" +``` + +ClickHouse knows the file uses a tab-separated format from filename extension. If you need to explicitly specify the format, simply add one of the [many ClickHouse input formats](../../interfaces/formats.md): +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')" +``` + +The `file` table function creates a table, and you can use `DESCRIBE` to see the inferred schema: + +```bash +./clickhouse local -q "DESCRIBE file('reviews.tsv')" +``` + +```response +marketplace Nullable(String) +customer_id Nullable(Int64) +review_id Nullable(String) +product_id Nullable(String) +product_parent Nullable(Int64) +product_title Nullable(String) +product_category Nullable(String) +star_rating Nullable(Int64) +helpful_votes Nullable(Int64) +total_votes Nullable(Int64) +vine Nullable(String) +verified_purchase Nullable(String) +review_headline Nullable(String) +review_body Nullable(String) +review_date Nullable(Date) +``` + +Let's find a product with the highest rating: + +```bash +./clickhouse local -q "SELECT + argMax(product_title,star_rating), + max(star_rating) +FROM file('reviews.tsv')" +``` + +```response +Monopoly Junior Board Game 5 +``` + +## Query data in a Parquet file in AWS S3 + +If you have a file in S3, use `clickhouse-local` and the `s3` table function to query the file in place (without inserting the data into a ClickHouse table). We have a file named `house_0.parquet` in a public bucket that contains home prices of property sold in the United Kingdom. Let's see how many rows it has: + +```bash +./clickhouse local -q " +SELECT count() +FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')" +``` + +The file has 2.7M rows: + +```response +2772030 +``` + +It's always useful to see what the inferred schema that ClickHouse determines from the file: + +```bash +./clickhouse local -q "DESCRIBE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')" +``` + +```response +price Nullable(Int64) +date Nullable(UInt16) +postcode1 Nullable(String) +postcode2 Nullable(String) +type Nullable(String) +is_new Nullable(UInt8) +duration Nullable(String) +addr1 Nullable(String) +addr2 Nullable(String) +street Nullable(String) +locality Nullable(String) +town Nullable(String) +district Nullable(String) +county Nullable(String) +``` + +Let's see what the most expensive neighborhoods are: + +```bash +./clickhouse local -q " +SELECT + town, + district, + count() AS c, + round(avg(price)) AS price, + bar(price, 0, 5000000, 100) +FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet') +GROUP BY + town, + district +HAVING c >= 100 +ORDER BY price DESC +LIMIT 10" +``` + +```response +LONDON CITY OF LONDON 886 2271305 █████████████████████████████████████████████▍ +LEATHERHEAD ELMBRIDGE 206 1176680 ███████████████████████▌ +LONDON CITY OF WESTMINSTER 12577 1108221 ██████████████████████▏ +LONDON KENSINGTON AND CHELSEA 8728 1094496 █████████████████████▉ +HYTHE FOLKESTONE AND HYTHE 130 1023980 ████████████████████▍ +CHALFONT ST GILES CHILTERN 113 835754 ████████████████▋ +AMERSHAM BUCKINGHAMSHIRE 113 799596 ███████████████▉ +VIRGINIA WATER RUNNYMEDE 356 789301 ███████████████▊ +BARNET ENFIELD 282 740514 ██████████████▊ +NORTHWOOD THREE RIVERS 184 731609 ██████████████▋ +``` + +:::tip +When you are ready to insert your files into ClickHouse, startup a ClickHouse server and insert the results of your `file` and `s3` table functions into a `MergeTree` table. View the [Quick Start](../../quick-start.mdx) for more details. ::: -For temporary data, a unique temporary data directory is created by default. ## Usage {#usage} -Basic usage: +By default `clickhouse-local` has access to data of a ClickHouse server on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default. + +Basic usage (Linux): ``` bash -$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \ - --query "query" +$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" --query "query" ``` +Basic usage (Mac): + +``` bash +$ ./clickhouse local --structure "table_structure" --input-format "format_of_incoming_data" --query "query" +``` + +:::note +`clickhouse-local` is also supported on Windows through WSL2. +::: + Arguments: -- `-S`, `--structure` — table structure for input data. -- `--input-format` — input format, `TSV` by default. -- `-f`, `--file` — path to data, `stdin` by default. -- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option. -- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option. -- `-N`, `--table` — table name where to put output data, `table` by default. -- `--format`, `--output-format` — output format, `TSV` by default. -- `-d`, `--database` — default database, `_local` by default. -- `--stacktrace` — whether to dump debug output in case of exception. -- `--echo` — print query before execution. -- `--verbose` — more details on query execution. -- `--logger.console` — Log to console. -- `--logger.log` — Log file name. -- `--logger.level` — Log level. -- `--ignore-error` — do not stop processing if a query failed. -- `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. -- `--no-system-tables` — do not attach system tables. -- `--help` — arguments references for `clickhouse-local`. -- `-V`, `--version` — print version information and exit. +- `-S`, `--structure` — table structure for input data. +- `--input-format` — input format, `TSV` by default. +- `-f`, `--file` — path to data, `stdin` by default. +- `-q`, `--query` — queries to execute with `;` as delimiter. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` - file path with queries to execute. Cannot be used simultaneously with `--query`. +- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. +- `-N`, `--table` — table name where to put output data, `table` by default. +- `--format`, `--output-format` — output format, `TSV` by default. +- `-d`, `--database` — default database, `_local` by default. +- `--stacktrace` — whether to dump debug output in case of exception. +- `--echo` — print query before execution. +- `--verbose` — more details on query execution. +- `--logger.console` — Log to console. +- `--logger.log` — Log file name. +- `--logger.level` — Log level. +- `--ignore-error` — do not stop processing if a query failed. +- `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. +- `--no-system-tables` — do not attach system tables. +- `--help` — arguments references for `clickhouse-local`. +- `-V`, `--version` — print version information and exit. Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index a8c0239c102..112a51cfa97 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -1,16 +1,16 @@ --- slug: /en/operations/utilities/ sidebar_position: 56 -sidebar_label: Overview +sidebar_label: List of tools and utilities pagination_next: 'en/operations/utilities/clickhouse-copier' --- -# ClickHouse Utilities +# List of tools and utilities -- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. -- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. -- [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. -- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. -- [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — Compresses and decompresses data. -- [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver. +- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. +- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. +- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. +- [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. +- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. +- [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — Compresses and decompresses data. +- [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver. diff --git a/docs/en/sql-reference/_category_.yml b/docs/en/sql-reference/_category_.yml index d799ecef539..45eaa6e7c16 100644 --- a/docs/en/sql-reference/_category_.yml +++ b/docs/en/sql-reference/_category_.yml @@ -1,7 +1,7 @@ -position: 15 +position: 1 label: 'SQL Reference' collapsible: true collapsed: true link: - type: doc - id: en/sql-reference/index + type: generated-index + slug: /en/sql-reference diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 704e88c6313..e1db5d8d23e 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -44,7 +44,7 @@ If you apply this combinator, the aggregate function returns the same value but **Arguments** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. **Returned values** @@ -72,11 +72,11 @@ If you apply this combinator, the aggregate function does not return the resulti To work with these states, use: -- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. -- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. -- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. -- [-Merge](#aggregate_functions_combinators-merge) combinator. -- [-MergeState](#aggregate_functions_combinators-mergestate) combinator. +- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. +- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. +- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. +- [-Merge](#aggregate_functions_combinators-merge) combinator. +- [-MergeState](#aggregate_functions_combinators-mergestate) combinator. ## -Merge @@ -111,7 +111,7 @@ If an aggregate function does not have input values, with this combinator it ret **Arguments** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. **Returned values** @@ -171,12 +171,12 @@ This combinator converts a result of an aggregate function to the [Nullable](../ **Arguments** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. **Returned values** -- The result of the aggregate function, converted to the `Nullable` data type. -- `NULL`, if there is nothing to aggregate. +- The result of the aggregate function, converted to the `Nullable` data type. +- `NULL`, if there is nothing to aggregate. Type: `Nullable(aggregate function return type)`. @@ -228,15 +228,15 @@ Lets you divide data into groups, and then separately aggregates the data in tho **Arguments** -- `start` — Starting value of the whole required interval for `resampling_key` values. -- `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval does not include the `stop` value `[start, stop)`. -- `step` — Step for separating the whole interval into subintervals. The `aggFunction` is executed over each of those subintervals independently. -- `resampling_key` — Column whose values are used for separating data into intervals. -- `aggFunction_params` — `aggFunction` parameters. +- `start` — Starting value of the whole required interval for `resampling_key` values. +- `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval does not include the `stop` value `[start, stop)`. +- `step` — Step for separating the whole interval into subintervals. The `aggFunction` is executed over each of those subintervals independently. +- `resampling_key` — Column whose values are used for separating data into intervals. +- `aggFunction_params` — `aggFunction` parameters. **Returned values** -- Array of `aggFunction` results for each subinterval. +- Array of `aggFunction` results for each subinterval. **Example** @@ -285,3 +285,8 @@ FROM people │ [3,2] │ [11.5,12.949999809265137] │ └────────┴───────────────────────────┘ ``` + + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 56a55c6b7a0..8951ac4ee6a 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -10,8 +10,8 @@ Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggrega ClickHouse also supports: -- [Parametric aggregate functions](../../sql-reference/aggregate-functions/parametric-functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. -- [Combinators](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. +- [Parametric aggregate functions](../../sql-reference/aggregate-functions/parametric-functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. +- [Combinators](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. ## NULL Processing diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 40184c0aa02..1b20f74d466 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -28,7 +28,7 @@ The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.or **Returned values** -- [Array](../../sql-reference/data-types/array.md) of [Tuples](../../sql-reference/data-types/tuple.md) of the following format: +- [Array](../../sql-reference/data-types/array.md) of [Tuples](../../sql-reference/data-types/tuple.md) of the following format: ``` [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] @@ -90,35 +90,35 @@ Checks whether the sequence contains an event chain that matches the pattern. sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` -:::warning +:::note Events that occur at the same second may lay in the sequence in an undefined order affecting the result. ::: **Arguments** -- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. **Parameters** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). **Returned values** -- 1, if the pattern is matched. -- 0, if the pattern isn’t matched. +- 1, if the pattern is matched. +- 0, if the pattern isn’t matched. Type: `UInt8`. **Pattern syntax** -- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. +- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. -- `.*` — Matches any number of events. You do not need conditional arguments to match this element of the pattern. +- `.*` — Matches any number of events. You do not need conditional arguments to match this element of the pattern. -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=`, `==` operators. +- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=`, `==` operators. **Examples** @@ -170,13 +170,13 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM **See Also** -- [sequenceCount](#function-sequencecount) +- [sequenceCount](#function-sequencecount) ## sequenceCount(pattern)(time, cond1, cond2, …) Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. -:::warning +:::note Events that occur at the same second may lay in the sequence in an undefined order affecting the result. ::: @@ -186,17 +186,17 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) **Arguments** -- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. **Parameters** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). **Returned values** -- Number of non-overlapping event chains that are matched. +- Number of non-overlapping event chains that are matched. Type: `UInt64`. @@ -229,7 +229,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t **See Also** -- [sequenceMatch](#function-sequencematch) +- [sequenceMatch](#function-sequencematch) ## windowFunnel @@ -237,11 +237,11 @@ Searches for event chains in a sliding time window and calculates the maximum nu The function works according to the algorithm: -- The function searches for data that triggers the first condition in the chain and sets the event counter to 1. This is the moment when the sliding window starts. +- The function searches for data that triggers the first condition in the chain and sets the event counter to 1. This is the moment when the sliding window starts. -- If events from the chain occur sequentially within the window, the counter is incremented. If the sequence of events is disrupted, the counter isn’t incremented. +- If events from the chain occur sequentially within the window, the counter is incremented. If the sequence of events is disrupted, the counter isn’t incremented. -- If the data has multiple event chains at varying points of completion, the function will only output the size of the longest chain. +- If the data has multiple event chains at varying points of completion, the function will only output the size of the longest chain. **Syntax** @@ -251,16 +251,16 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN) **Arguments** -- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). -- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). +- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). **Parameters** -- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. -- `mode` — It is an optional argument. One or more modes can be set. - - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. - - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. - - `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps. +- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. +- `mode` — It is an optional argument. One or more modes can be set. + - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. + - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. + - `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps. **Returned value** @@ -341,14 +341,14 @@ retention(cond1, cond2, ..., cond32); **Arguments** -- `cond` — An expression that returns a `UInt8` result (1 or 0). +- `cond` — An expression that returns a `UInt8` result (1 or 0). **Returned value** The array of 1 or 0. -- 1 — Condition was met for the event. -- 0 — Condition wasn’t met for the event. +- 1 — Condition was met for the event. +- 0 — Condition wasn’t met for the event. Type: `UInt8`. @@ -481,9 +481,9 @@ Result: Where: -- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition). -- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions). -- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). +- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition). +- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions). +- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). ## uniqUpTo(N)(x) @@ -524,11 +524,11 @@ sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event **Parameters** -- `direction` — Used to navigate to directions. +- `direction` — Used to navigate to directions. - forward — Moving forward. - backward — Moving backward. -- `base` — Used to set the base point. +- `base` — Used to set the base point. - head — Set the base point to the first event. - tail — Set the base point to the last event. - first_match — Set the base point to the first matched `event1`. @@ -536,15 +536,15 @@ sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event **Arguments** -- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types. -- `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: [String](../../sql-reference/data-types/string.md) and [Nullable(String)](../../sql-reference/data-types/nullable.md). -- `base_condition` — Condition that the base point must fulfill. -- `event1`, `event2`, ... — Conditions describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types. +- `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: [String](../../sql-reference/data-types/string.md) and [Nullable(String)](../../sql-reference/data-types/nullable.md). +- `base_condition` — Condition that the base point must fulfill. +- `event1`, `event2`, ... — Conditions describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). **Returned values** -- `event_column[next_index]` — If the pattern is matched and next value exists. -- `NULL` - If the pattern isn’t matched or next value doesn't exist. +- `event_column[next_index]` — If the pattern is matched and next value exists. +- `NULL` - If the pattern isn’t matched or next value doesn't exist. Type: [Nullable(String)](../../sql-reference/data-types/nullable.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 88a56463de1..9fbc21910f8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -13,7 +13,7 @@ anyHeavy(column) **Arguments** -- `column` – The column name. +- `column` – The column name. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 7b99c831010..65c43ab04c0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -15,12 +15,12 @@ argMax(arg, val) **Arguments** -- `arg` — Argument. -- `val` — Value. +- `arg` — Argument. +- `val` — Value. **Returned value** -- `arg` value that corresponds to maximum `val` value. +- `arg` value that corresponds to maximum `val` value. Type: matches `arg` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 945dda5e46d..a7c21e3f15b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -15,12 +15,12 @@ argMin(arg, val) **Arguments** -- `arg` — Argument. -- `val` — Value. +- `arg` — Argument. +- `val` — Value. **Returned value** -- `arg` value that corresponds to minimum `val` value. +- `arg` value that corresponds to minimum `val` value. Type: matches `arg` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index dd37fe62b95..5463d8a1874 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -15,12 +15,12 @@ avg(x) **Arguments** -- `x` — input values, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — input values, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned value** -- The arithmetic mean, always as [Float64](../../../sql-reference/data-types/float.md). -- `NaN` if the input parameter `x` is empty. +- The arithmetic mean, always as [Float64](../../../sql-reference/data-types/float.md). +- `NaN` if the input parameter `x` is empty. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 00dffdc33d2..99d3bac763d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -15,8 +15,8 @@ avgWeighted(x, weight) **Arguments** -- `x` — Values. -- `weight` — Weights of the values. +- `x` — Values. +- `weight` — Weights of the values. `x` and `weight` must both be [Integer](../../../sql-reference/data-types/int-uint.md), @@ -26,8 +26,8 @@ but may have different types. **Returned value** -- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty. -- Weighted mean otherwise. +- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty. +- Weighted mean otherwise. **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md index e75537778fe..1b53ca1528f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md +++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md @@ -5,7 +5,7 @@ sidebar_position: 350 # contingency -The `contingency` function calculates the [contingency coefficient](https://en.wikipedia.org/wiki/Contingency_table#Cram%C3%A9r's_V_and_the_contingency_coefficient_C), a value that measures the association between two columns in a table. The computation is similar to [the `cramersV` function](./cramersv) but with a different denominator in the square root. +The `contingency` function calculates the [contingency coefficient](https://en.wikipedia.org/wiki/Contingency_table#Cram%C3%A9r's_V_and_the_contingency_coefficient_C), a value that measures the association between two columns in a table. The computation is similar to [the `cramersV` function](./cramersv.md) but with a different denominator in the square root. **Syntax** @@ -16,11 +16,11 @@ contingency(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between 0 to 1. The larger the result, the closer the association of the two columns. +- a value between 0 to 1. The larger the result, the closer the association of the two columns. **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 356f731ff16..a98c8e50174 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -9,20 +9,20 @@ Counts the number of rows or not-NULL values. ClickHouse supports the following syntaxes for `count`: -- `count(expr)` or `COUNT(DISTINCT expr)`. -- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. +- `count(expr)` or `COUNT(DISTINCT expr)`. +- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. **Arguments** The function can take: -- Zero parameters. -- One [expression](../../../sql-reference/syntax.md#syntax-expressions). +- Zero parameters. +- One [expression](../../../sql-reference/syntax.md#syntax-expressions). **Returned value** -- If the function is called without parameters it counts the number of rows. -- If the [expression](../../../sql-reference/syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../../sql-reference/data-types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. +- If the function is called without parameters it counts the number of rows. +- If the [expression](../../../sql-reference/syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../../sql-reference/data-types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. In both cases the type of the returned value is [UInt64](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index fa37e3b5781..f412724ea08 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -15,11 +15,11 @@ cramersV(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). +- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 51524033147..8e577efbc4d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -6,7 +6,7 @@ sidebar_position: 352 # cramersVBiasCorrected -Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction). +Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction). @@ -18,11 +18,11 @@ cramersVBiasCorrected(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). +- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index d5d9e9369a4..37d9d08cbdb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -19,7 +19,7 @@ deltaSum(value) **Arguments** -- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type. +- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type. **Returned value** @@ -71,4 +71,4 @@ Result: ## See Also -- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference) +- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference) diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index e08e69b7cf6..c51d86389b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -6,7 +6,7 @@ title: deltaSumTimestamp Adds the difference between consecutive rows. If the difference is negative, it is ignored. -This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that are ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the "right" order. This function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states correctly during merging. +This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that store data ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the correct order, without storing the original, unrounded timestamp value. The `deltaSumTimestamp` function keeps track of the original `timestamp` of the values it's seen, so the values (states) of the function are correctly computed during merging of parts. To calculate the delta sum across an ordered collection you can simply use the [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) function. @@ -18,12 +18,12 @@ deltaSumTimestamp(value, timestamp) **Arguments** -- `value` — Input values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). -- `timestamp` — The parameter for order values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). +- `value` — Input values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). +- `timestamp` — The parameter for order values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). **Returned value** -- Accumulated differences between consecutive values, ordered by the `timestamp` parameter. +- Accumulated differences between consecutive values, ordered by the `timestamp` parameter. Type: [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) or [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/entropy.md b/docs/en/sql-reference/aggregate-functions/reference/entropy.md index d86f4f4197a..fc8d627ecab 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/entropy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/entropy.md @@ -15,11 +15,11 @@ entropy(val) **Arguments** -- `val` — Column of values of any type. +- `val` — Column of values of any type. **Returned value** -- Shannon entropy. +- Shannon entropy. Type: [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index 2587bc5533f..a8203c6b3f4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -11,19 +11,19 @@ sidebar_title: exponentialMovingAverage **Syntax** ```sql -exponentialMovingAverage(x)(value, timestamp) +exponentialMovingAverage(x)(value, timeunit) ``` -Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be. +Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be. **Arguments** -- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions.md#intdiva-b). **Parameters** -- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned values** @@ -148,3 +148,58 @@ Result: │ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│ └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘ ``` + +```sql +CREATE TABLE data +ENGINE = Memory AS +SELECT + 10 AS value, + toDateTime('2020-01-01') + (3600 * number) AS time +FROM numbers_mt(10); + + +-- Calculate timeunit using intDiv +SELECT + value, + time, + exponentialMovingAverage(1)(value, intDiv(toUInt32(time), 3600)) OVER (ORDER BY time ASC) AS res, + intDiv(toUInt32(time), 3600) AS timeunit +FROM data +ORDER BY time ASC; + +┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐ +│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │ +│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │ +│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │ +│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │ +│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │ +│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │ +│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │ +│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │ +│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │ +│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │ +└───────┴─────────────────────┴─────────────┴──────────┘ + + +-- Calculate timeunit using toRelativeHourNum +SELECT + value, + time, + exponentialMovingAverage(1)(value, toRelativeHourNum(time)) OVER (ORDER BY time ASC) AS res, + toRelativeHourNum(time) AS timeunit +FROM data +ORDER BY time ASC; + +┌─value─┬────────────────time─┬─────────res─┬─timeunit─┐ +│ 10 │ 2020-01-01 00:00:00 │ 5 │ 438288 │ +│ 10 │ 2020-01-01 01:00:00 │ 7.5 │ 438289 │ +│ 10 │ 2020-01-01 02:00:00 │ 8.75 │ 438290 │ +│ 10 │ 2020-01-01 03:00:00 │ 9.375 │ 438291 │ +│ 10 │ 2020-01-01 04:00:00 │ 9.6875 │ 438292 │ +│ 10 │ 2020-01-01 05:00:00 │ 9.84375 │ 438293 │ +│ 10 │ 2020-01-01 06:00:00 │ 9.921875 │ 438294 │ +│ 10 │ 2020-01-01 07:00:00 │ 9.9609375 │ 438295 │ +│ 10 │ 2020-01-01 08:00:00 │ 9.98046875 │ 438296 │ +│ 10 │ 2020-01-01 09:00:00 │ 9.990234375 │ 438297 │ +└───────┴─────────────────────┴─────────────┴──────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md new file mode 100644 index 00000000000..f343ca3f66c --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -0,0 +1,55 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/first_value +sidebar_position: 7 +--- + +# first_value + +Selects the first encountered value, similar to `any`, but could accept NULL. + +## examples + +```sql +insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null) +``` + +### example1 +The NULL value is ignored at default. +```sql +select first_value(b) from test_data +``` + +```text +┌─first_value_ignore_nulls(b)─┐ +│ 3 │ +└─────────────────────────────┘ + +``` + +### example2 +The NULL value is ignored. +```sql +select first_value(b) ignore nulls from test_data +``` + +```text +┌─first_value_ignore_nulls(b)─┐ +│ 3 │ +└─────────────────────────────┘ + +``` + +### example3 +The NULL value is accepted. +```sql +select first_value(b) respect nulls from test_data +``` + +```text + +┌─first_value_respect_nulls(b)─┐ +│ ᴺᵁᴸᴸ │ +└──────────────────────────────┘ +``` + + diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index aafa643a972..d745e8a0e7a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -15,19 +15,19 @@ groupArrayInsertAt(default_x, size)(x, pos) If in one query several values are inserted into the same position, the function behaves in the following ways: -- If a query is executed in a single thread, the first one of the inserted values is used. -- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. +- If a query is executed in a single thread, the first one of the inserted values is used. +- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. **Arguments** -- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). -- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). -- `default_x` — Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create/table.md#create-default-values) are used. -- `size` — Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). +- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). +- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). +- `default_x` — Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create/table.md#create-default-values) are used. +- `size` — Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). **Returned value** -- Array with inserted values. +- Array with inserted values. Type: [Array](../../../sql-reference/data-types/array.md#data-type-array). diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 8fa1939e7d3..32c0608afeb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -16,12 +16,12 @@ The function can take the window size as a parameter. If left unspecified, the f **Arguments** -- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. +- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. +- `window_size` — Size of the calculation window. **Returned values** -- Array of the same size and type as the input data. +- Array of the same size and type as the input data. The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). It truncates the decimal places insignificant for the resulting data type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index a51857418c6..6f2a60dd080 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -16,12 +16,12 @@ The function can take the window size as a parameter. If left unspecified, the f **Arguments** -- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. +- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. +- `window_size` — Size of the calculation window. **Returned values** -- Array of the same size and type as the input data. +- Array of the same size and type as the input data. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 26c41c6636b..393087161df 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -15,13 +15,13 @@ groupArraySample(max_size[, seed])(x) **Arguments** -- `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). -- `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. -- `x` — Argument (column name or expression). +- `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). +- `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. +- `x` — Argument (column name or expression). **Returned values** -- Array of randomly selected `x` arguments. +- Array of randomly selected `x` arguments. Type: [Array](../../data-types/array.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index f89e3796aaa..5f57407a419 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -13,11 +13,11 @@ groupBitAnd(expr) **Arguments** -`expr` – An expression that results in `UInt*` type. +`expr` – An expression that results in `UInt*` or `Int*` type. **Return value** -Value of the `UInt*` type. +Value of the `UInt*` or `Int*` type. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 75b34d9c5a3..59be69540b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -13,11 +13,11 @@ groupBitOr(expr) **Arguments** -`expr` – An expression that results in `UInt*` type. +`expr` – An expression that results in `UInt*` or `Int*` type. **Returned value** -Value of the `UInt*` type. +Value of the `UInt*` or `Int*` type. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index ca6fb9f8352..b00876a2fdf 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -13,11 +13,11 @@ groupBitXor(expr) **Arguments** -`expr` – An expression that results in `UInt*` type. +`expr` – An expression that results in `UInt*` or `Int*` type. **Return value** -Value of the `UInt*` type. +Value of the `UInt*` or `Int*` type. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index bd8e72e0fec..50208352f38 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -9,75 +9,77 @@ toc_hidden: true Standard aggregate functions: -- [count](../../../sql-reference/aggregate-functions/reference/count.md) -- [min](../../../sql-reference/aggregate-functions/reference/min.md) -- [max](../../../sql-reference/aggregate-functions/reference/max.md) -- [sum](../../../sql-reference/aggregate-functions/reference/sum.md) -- [avg](../../../sql-reference/aggregate-functions/reference/avg.md) -- [any](../../../sql-reference/aggregate-functions/reference/any.md) -- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md) -- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md) -- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md) -- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) -- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md) -- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md) +- [count](../../../sql-reference/aggregate-functions/reference/count.md) +- [min](../../../sql-reference/aggregate-functions/reference/min.md) +- [max](../../../sql-reference/aggregate-functions/reference/max.md) +- [sum](../../../sql-reference/aggregate-functions/reference/sum.md) +- [avg](../../../sql-reference/aggregate-functions/reference/avg.md) +- [any](../../../sql-reference/aggregate-functions/reference/any.md) +- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md) +- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md) +- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md) +- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) +- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md) +- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md) ClickHouse-specific aggregate functions: -- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) -- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) -- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) -- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) -- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) -- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) -- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) -- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) -- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md) -- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) -- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) -- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) -- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) -- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) -- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) -- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) -- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) -- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) -- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) -- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) -- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) -- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) -- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) -- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) -- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) -- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) -- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) -- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md) -- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) -- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) -- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) -- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) -- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) -- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) -- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) -- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) -- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) -- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) -- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) -- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) -- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) -- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) -- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) -- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) -- [contingency](./contingency.md) -- [cramersV](./cramersv.md) -- [cramersVBiasCorrected](./cramersvbiascorrected.md) -- [theilsU](./theilsu.md) -- [maxIntersections](./maxintersections.md) -- [maxIntersectionsPosition](./maxintersectionsposition.md) +- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) +- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) +- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md) +- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md) +- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) +- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) +- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) +- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) +- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md) +- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) +- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) +- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) +- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) +- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) +- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) +- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) +- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) +- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) +- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) +- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) +- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) +- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) +- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) +- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) +- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) +- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) +- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md) +- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) +- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) +- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) +- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) +- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) +- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) +- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) +- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) +- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) +- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) +- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) +- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) +- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) +- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) +- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) +- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) +- [contingency](./contingency.md) +- [cramersV](./cramersv.md) +- [cramersVBiasCorrected](./cramersvbiascorrected.md) +- [theilsU](./theilsu.md) +- [maxIntersections](./maxintersections.md) +- [maxIntersectionsPosition](./maxintersectionsposition.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md index e161a8c5754..5990345b765 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -15,8 +15,8 @@ intervalLengthSum(start, end) **Arguments** -- `start` — The starting value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). -- `end` — The ending value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). +- `start` — The starting value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). +- `end` — The ending value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). :::note Arguments must be of the same data type. Otherwise, an exception will be thrown. @@ -24,7 +24,7 @@ Arguments must be of the same data type. Otherwise, an exception will be thrown. **Returned value** -- Total length of union of all ranges (segments on numeric axis). Depending on the type of the argument, the return value may be [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64) type. +- Total length of union of all ranges (segments on numeric axis). Depending on the type of the argument, the return value may be [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64) type. **Examples** diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md new file mode 100644 index 00000000000..3da9645181e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md @@ -0,0 +1,118 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest +sidebar_position: 300 +sidebar_label: kolmogorovSmirnovTest +--- + +# kolmogorovSmirnovTest + +Applies Kolmogorov-Smirnov's test to samples from two populations. + +**Syntax** + +``` sql +kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +Samples must belong to continuous, one-dimensional probability distributions. + +**Arguments** + +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Parameters** + +- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + Let F(x) and G(x) be the CDFs of the first and second distributions respectively. + - `'two-sided'` + The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x. + And the alternative is that the distributions are not identical. + - `'greater'` + The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one, + e.g. the CDF of first distribution lies above and hence to the left of that for the second one. + Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x. + - `'less'`. + The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one, + e.g. the CDF of first distribution lies below and hence to the right of that for the second one. + Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x. +- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md). + - `'exact'` - calculation is performed using precise probability distribution of the test statistics. Compute intensive and wasteful except for small samples. + - `'asymp'` (`'asymptotic'`) - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar. + - `'auto'` - the `'exact'` method is used when a maximum number of samples is less than 10'000. + + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: + +- calculated statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Query: + +``` sql +SELECT kolmogorovSmirnovTest('less', 'exact')(value, num) +FROM +( + SELECT + randNormal(0, 10) AS value, + 0 AS num + FROM numbers(10000) + UNION ALL + SELECT + randNormal(0, 10) AS value, + 1 AS num + FROM numbers(10000) +) +``` + +Result: + +``` text +┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐ +│ (0.009899999999999996,0.37528595205132287) │ +└────────────────────────────────────────────────────┘ +``` + +Note: +P-value is bigger than 0.05 (for confidence level of 95%), so null hypothesis is not rejected. + + +Query: + +``` sql +SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num) +FROM +( + SELECT + randStudentT(10) AS value, + 0 AS num + FROM numbers(100) + UNION ALL + SELECT + randNormal(0, 10) AS value, + 1 AS num + FROM numbers(100) +) +``` + +Result: + +``` text +┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐ +│ (0.4100000000000002,6.61735760482795e-8) │ +└─────────────────────────────────────────────────────────┘ +``` + +Note: +P-value is less than 0.05 (for confidence level of 95%), so null hypothesis is rejected. + + +**See Also** + +- [Kolmogorov-Smirnov'test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test) diff --git a/docs/en/sql-reference/aggregate-functions/reference/last_value.md b/docs/en/sql-reference/aggregate-functions/reference/last_value.md new file mode 100644 index 00000000000..7b6e14e4a55 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md @@ -0,0 +1,53 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/last_value +sidebar_position: 8 +--- + +# last_value + +Selects the last encountered value, similar to `anyLast`, but could accept NULL. + + +## examples + +```sql +insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null) +``` + +### example1 +The NULL value is ignored at default. +```sql +select last_value(b) from test_data +``` + +```text +┌─last_value_ignore_nulls(b)─┐ +│ 5 │ +└────────────────────────────┘ +``` + +### example2 +The NULL value is ignored. +```sql +select last_value(b) ignore nulls from test_data +``` + +```text +┌─last_value_ignore_nulls(b)─┐ +│ 5 │ +└────────────────────────────┘ +``` + +### example3 +The NULL value is accepted. +```sql +select last_value(b) respect nulls from test_data +``` + +```text +┌─last_value_respect_nulls(b)─┐ +│ ᴺᵁᴸᴸ │ +└─────────────────────────────┘ +``` + + diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index 70c75b898c6..af744f445d9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -19,23 +19,23 @@ The null hypothesis is that two populations are stochastically equal. Also one-s **Arguments** -- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). - - `'two-sided'`; - - `'greater'`; - - `'less'`. -- `continuity_correction` — if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). +- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + - `'two-sided'`; + - `'greater'`; + - `'less'`. +- `continuity_correction` — if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with two elements: -- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -69,5 +69,5 @@ Result: **See Also** -- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test) -- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering) +- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test) +- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering) diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index 89742ca1509..1cf2bebf26f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -19,23 +19,23 @@ The null hypothesis is that means of populations are equal. Normal distribution **Arguments** -- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `population_variance_x` — Variance for population x. [Float](../../../sql-reference/data-types/float.md). -- `population_variance_y` — Variance for population y. [Float](../../../sql-reference/data-types/float.md). -- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). +- `population_variance_x` — Variance for population x. [Float](../../../sql-reference/data-types/float.md). +- `population_variance_y` — Variance for population y. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with four elements: -- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 5ac3c6ef721..f20b23a0c8b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -9,15 +9,15 @@ The `median*` functions are the aliases for the corresponding `quantile*` functi Functions: -- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). -- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). +- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). +- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). +- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). +- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). +- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 414574e00e6..91b6b1b0d80 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -23,18 +23,18 @@ Alias: `median`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -65,5 +65,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md new file mode 100644 index 00000000000..7352781d126 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md @@ -0,0 +1,76 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/quantileGK +sidebar_position: 204 +--- + +# quantileGK + +Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability. + +`quantileGK` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result. + +**Syntax** + +``` sql +quantileGK(accuracy, level)(expr) +``` + +Alias: `medianGK`. + +**Arguments** + +- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy. + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + + +**Returned value** + +- Quantile of the specified level and accuracy. + + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +``` sql +SELECT quantileGK(1, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(1, 0.25)(plus(number, 1))─┐ +│ 1 │ +└──────────────────────────────────────┘ + +SELECT quantileGK(10, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(10, 0.25)(plus(number, 1))─┐ +│ 156 │ +└───────────────────────────────────────┘ + +SELECT quantileGK(100, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(100, 0.25)(plus(number, 1))─┐ +│ 251 │ +└────────────────────────────────────────┘ + +SELECT quantileGK(1000, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(1000, 0.25)(plus(number, 1))─┐ +│ 249 │ +└─────────────────────────────────────────┘ +``` + + +**See Also** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md index 94feca9e69e..4377f2f1b17 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -18,15 +18,15 @@ Alias: `medianBFloat16` **Arguments** -- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). +- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). **Parameters** -- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md). **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). @@ -64,5 +64,5 @@ Like `quantileBFloat16` but takes into account the weight of each sequence membe **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 26826afd126..7235c47da70 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -21,19 +21,19 @@ Alias: `medianDeterministic`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occurs too often, the function works incorrectly. **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -64,5 +64,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 04fe597a34e..d7d7413c283 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -23,18 +23,18 @@ Alias: `medianExact`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -82,18 +82,18 @@ Alias: `medianExactLow`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -132,18 +132,18 @@ Alias: `medianExactHigh`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -179,21 +179,21 @@ quantileExactExclusive(level)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -231,21 +231,21 @@ quantileExactInclusive(level)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -267,5 +267,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 16e6438a3bf..34def8d7411 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -21,19 +21,19 @@ Alias: `medianExactWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -64,5 +64,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md index 07fcd187217..41d2627fb7b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md @@ -21,19 +21,19 @@ Alias: `medianInterpolatedWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -64,5 +64,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 57151915336..38db39d2eec 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -29,21 +29,21 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). +- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). **Returned value** -- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. Type of array values: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -81,21 +81,21 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). +- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). **Returned value** -- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. Type of array values: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -114,3 +114,59 @@ Result: │ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ └─────────────────────────────────────────────────────────────────────┘ ``` + +## quantilesGK + +`quantilesGK` works similarly with `quantileGK` but allows us to calculate quantities at different levels simultaneously and returns an array. + +**Syntax** + +``` sql +quantilesGK(accuracy, level1, level2, ...)(expr) +``` + +**Returned value** + +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. + +Type of array values: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + + +``` sql +SELECT quantilesGK(1, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesGK(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [1,1,1] │ +└──────────────────────────────────────────────────┘ + +SELECT quantilesGK(10, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesGK(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [156,413,659] │ +└───────────────────────────────────────────────────┘ + + +SELECT quantilesGK(100, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesGK(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [251,498,741] │ +└────────────────────────────────────────────────────┘ + +SELECT quantilesGK(1000, 0.25, 0.5, 0.75)(number + 1) +FROM numbers(1000) + +┌─quantilesGK(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [249,499,749] │ +└─────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index 5da37a4832f..796e87b02d8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -23,18 +23,18 @@ Alias: `medianTDigest`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -54,5 +54,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index e7abe08e39f..7b9addbbdde 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -27,19 +27,19 @@ Alias: `medianTDigestWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -59,5 +59,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index ead381b4497..b5b1c8a0c01 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -21,19 +21,19 @@ Alias: `medianTiming`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. +- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + - If negative values are passed to the function, the behavior is undefined. + - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. **Accuracy** The calculation is accurate if: -- Total number of values does not exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. +- Total number of values does not exceed 5670. +- Total number of values exceeds 5670, but the page loading time is less than 1024ms. Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. @@ -43,7 +43,7 @@ For calculating page loading time quantiles, this function is more effective and **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: `Float32`. @@ -85,5 +85,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index f65c6b1e6ec..df483aac01e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -21,21 +21,21 @@ Alias: `medianTimingWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. +- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. - If negative values are passed to the function, the behavior is undefined. - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. **Accuracy** The calculation is accurate if: -- Total number of values does not exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. +- Total number of values does not exceed 5670. +- Total number of values exceeds 5670, but the page loading time is less than 1024ms. Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. @@ -45,7 +45,7 @@ For calculating page loading time quantiles, this function is more effective and **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: `Float32`. @@ -118,5 +118,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index 231eb2b091b..27f2dd124e4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -15,12 +15,12 @@ rankCorr(x, y) **Arguments** -- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). -- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). +- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). +- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). **Returned value(s)** -- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables. +- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables. Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). @@ -55,4 +55,4 @@ Result: ``` **See Also** -- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) \ No newline at end of file +- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index a6380b78a79..bcff05ada47 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -13,8 +13,8 @@ simpleLinearRegression(x, y) Parameters: -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. +- `x` — Column with dependent variable values. +- `y` — Column with explanatory variable values. Returned values: diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 05ea373d4da..e21dad5b2f5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -21,18 +21,18 @@ sparkbar(buckets[, min_x, max_x])(x, y) **Parameters** -- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). -- `min_x` — The interval start. Optional parameter. -- `max_x` — The interval end. Optional parameter. +- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — The interval start. Optional parameter. +- `max_x` — The interval end. Optional parameter. **Arguments** -- `x` — The field with values. -- `y` — The field with the frequency of values. +- `x` — The field with values. +- `y` — The field with the frequency of values. **Returned value** -- The frequency histogram. +- The frequency histogram. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index 8126a80e25e..9481172c25b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -72,5 +72,5 @@ The query will return a column of predicted values. Note that first argument of **See Also** -- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) -- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) +- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) +- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md index 41eeb70c04f..0a040689681 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -52,5 +52,5 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') **See Also** -- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md#agg_functions-stochasticlinearregression) -- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) +- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md#agg_functions-stochasticlinearregression) +- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index 5ce0c769576..29b43851f44 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -19,22 +19,22 @@ The null hypothesis is that means of populations are equal. Normal distribution **Arguments** -- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified): -- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -68,5 +68,5 @@ Result: **See Also** -- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) -- [welchTTest function](welchttest.md#welchttest) +- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) +- [welchTTest function](welchttest.md#welchttest) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index b7773b0d09b..a59b87022d6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -14,11 +14,11 @@ sumCount(x) **Arguments** -- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned value** -- Tuple `(sum, count)`, where `sum` is the sum of numbers and `count` is the number of rows with not-NULL values. +- Tuple `(sum, count)`, where `sum` is the sum of numbers and `count` is the number of rows with not-NULL values. Type: [Tuple](../../../sql-reference/data-types/tuple.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md index 3e0783e9ad2..1a729b18b42 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md @@ -17,11 +17,11 @@ sumKahan(x) **Arguments** -- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned value** -- the sum of numbers, with type [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md) depends on type of input arguments +- the sum of numbers, with type [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md) depends on type of input arguments **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md index f2103d7862b..ef19438a53a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md +++ b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md @@ -16,11 +16,11 @@ theilsU(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between -1 and 1 +- a value between -1 and 1 **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index b025f6f6d54..bde29275f79 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -19,13 +19,13 @@ We recommend using the `N < 10` value; performance is reduced with large `N` val **Arguments** -- `N` – The number of elements to return. +- `N` – The number of elements to return. If the parameter is omitted, default value 10 is used. **Arguments** -- `x` – The value to calculate frequency. +- `x` – The value to calculate frequency. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index 32b234fd6b8..03932e88a6a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -15,9 +15,9 @@ topKWeighted(N)(x, weight) **Arguments** -- `N` — The number of elements to return. -- `x` — The value. -- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `N` — The number of elements to return. +- `x` — The value. +- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -42,4 +42,4 @@ Result: **See Also** -- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index d72311b3ede..b1c8336630b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -17,24 +17,24 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. +- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. -- Provides the result deterministically (it does not depend on the query processing order). +- Provides the result deterministically (it does not depend on the query processing order). We recommend using this function in almost all scenarios. **See Also** -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index f1287c6ff9b..2f3efde859d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -21,19 +21,19 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. -- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. +- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. -- Provides the result deterministically (it does not depend on the query processing order). +- Provides the result deterministically (it does not depend on the query processing order). :::note Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) @@ -41,14 +41,14 @@ Since it uses 32-bit hash for non-`String` type, the result will have very high Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`: -- Consumes several times less memory. -- Calculates with several times higher accuracy. -- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. +- Consumes several times less memory. +- Calculates with several times higher accuracy. +- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index 901c631b756..fd68a464881 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -21,7 +21,7 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index b598ad23df3..8594ebb3782 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -17,25 +17,25 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses the HyperLogLog algorithm to approximate the number of different argument values. +- Uses the HyperLogLog algorithm to approximate the number of different argument values. 2^12 5-bit cells are used. The size of the state is slightly more than 2.5 KB. The result is not very accurate (up to ~10% error) for small data sets (<10K elements). However, the result is fairly accurate for high-cardinality data sets (10K-100M), with a maximum error of ~1.6%. Starting from 100M, the estimation error increases, and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). -- Provides the determinate result (it does not depend on the query processing order). +- Provides the determinate result (it does not depend on the query processing order). We do not recommend using this function. In most cases, use the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) or [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) function. **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md index e2adf672909..45970f144cb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -16,24 +16,24 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values. +- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values. 4096(2^12) 64-bit sketch are used. The size of the state is about 41 KB. -- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail. +- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail. **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index ab35f8794e6..4f1085e65b4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -19,21 +19,21 @@ The null hypothesis is that means of populations are equal. Normal distribution **Arguments** -- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified) -- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -67,5 +67,5 @@ Result: **See Also** -- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) -- [studentTTest function](studentttest.md#studentttest) +- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) +- [studentTTest function](studentttest.md#studentttest) diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index d8547f03714..fe6d7ebe0dc 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -12,9 +12,9 @@ Aggregate functions can have an implementation-defined intermediate state that c **Parameters** -- Name of the aggregate function. If the function is parametric, specify its parameters too. +- Name of the aggregate function. If the function is parametric, specify its parameters too. -- Types of the aggregate function arguments. +- Types of the aggregate function arguments. **Example** @@ -63,3 +63,8 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP ## Usage Example See [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine description. + + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 707acbda760..20ce7d2ed52 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -46,8 +46,6 @@ SELECT [1, 2] AS x, toTypeName(x) ## Working with Data Types -The maximum size of an array is limited to one million elements. - When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md). If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`). diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index d43a00312dd..048466f7ae4 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -39,6 +39,6 @@ SELECT * FROM dt; **See Also** -- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) -- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [`DateTime` data type](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 826dc20a96d..7cf8b1b95fe 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -35,6 +35,6 @@ SELECT * FROM new; **See Also** -- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32) -- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero) -- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null) +- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32) +- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero) +- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index b10ceb79d13..059c6acdb9e 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -62,8 +62,8 @@ SELECT * FROM dt; └─────────────────────┴──────────┘ ``` -- When inserting datetime as an integer, it is treated as Unix Timestamp (UTC). `1546300800` represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as string the value will be shown as `'2019-01-01 03:00:00'` -- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and saved as `1546290000`. +- When inserting datetime as an integer, it is treated as Unix Timestamp (UTC). `1546300800` represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as string the value will be shown as `'2019-01-01 03:00:00'` +- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and saved as `1546290000`. **2.** Filtering on `DateTime` values @@ -137,11 +137,11 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse ## See Also -- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) -- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) -- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) -- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) -- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [The `Date` data type](../../sql-reference/data-types/date.md) +- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) +- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [The `Date` data type](../../sql-reference/data-types/date.md) diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index fa3a1eecd46..2d4035831fa 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -52,8 +52,8 @@ SELECT * FROM dt; └─────────────────────────┴──────────┘ ``` -- When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'`. Inserting datetime as a decimal will treat it similarly as an integer, except the value before the decimal point is the Unix Timestamp up to and including the seconds, and after the decimal point will be treated as the precision. -- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and stored as `1546290000000`. +- When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'`. Inserting datetime as a decimal will treat it similarly as an integer, except the value before the decimal point is the Unix Timestamp up to and including the seconds, and after the decimal point will be treated as the precision. +- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and stored as `1546290000000`. 2. Filtering on `DateTime64` values @@ -113,12 +113,12 @@ FROM dt; **See Also** -- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) -- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) -- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) -- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) -- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [`Date` data type](../../sql-reference/data-types/date.md) -- [`DateTime` data type](../../sql-reference/data-types/datetime.md) +- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) +- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [`Date` data type](../../sql-reference/data-types/date.md) +- [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index 7e4cb5ecaac..8df8b2519e3 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -10,8 +10,8 @@ Signed fixed-point numbers that keep precision during add, subtract and multiply ## Parameters -- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). -- S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. +- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). +- S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. Depending on P parameter value Decimal(P, S) is a synonym for: - P from \[ 1 : 9 \] - for Decimal32(S) @@ -21,10 +21,10 @@ Depending on P parameter value Decimal(P, S) is a synonym for: ## Decimal Value Ranges -- Decimal32(S) - ( -1 \* 10^(9 - S), 1 \* 10^(9 - S) ) -- Decimal64(S) - ( -1 \* 10^(18 - S), 1 \* 10^(18 - S) ) -- Decimal128(S) - ( -1 \* 10^(38 - S), 1 \* 10^(38 - S) ) -- Decimal256(S) - ( -1 \* 10^(76 - S), 1 \* 10^(76 - S) ) +- Decimal32(S) - ( -1 \* 10^(9 - S), 1 \* 10^(9 - S) ) +- Decimal64(S) - ( -1 \* 10^(18 - S), 1 \* 10^(18 - S) ) +- Decimal128(S) - ( -1 \* 10^(38 - S), 1 \* 10^(38 - S) ) +- Decimal256(S) - ( -1 \* 10^(76 - S), 1 \* 10^(76 - S) ) For example, Decimal32(4) can contain numbers from -99999.9999 to 99999.9999 with 0.0001 step. @@ -38,16 +38,16 @@ Because modern CPUs do not support 128-bit integers natively, operations on Deci Binary operations on Decimal result in wider result type (with any order of arguments). -- `Decimal64(S1) Decimal32(S2) -> Decimal64(S)` -- `Decimal128(S1) Decimal32(S2) -> Decimal128(S)` -- `Decimal128(S1) Decimal64(S2) -> Decimal128(S)` -- `Decimal256(S1) Decimal<32|64|128>(S2) -> Decimal256(S)` +- `Decimal64(S1) Decimal32(S2) -> Decimal64(S)` +- `Decimal128(S1) Decimal32(S2) -> Decimal128(S)` +- `Decimal128(S1) Decimal64(S2) -> Decimal128(S)` +- `Decimal256(S1) Decimal<32|64|128>(S2) -> Decimal256(S)` Rules for scale: -- add, subtract: S = max(S1, S2). -- multiply: S = S1 + S2. -- divide: S = S1. +- add, subtract: S = max(S1, S2). +- multiply: S = S1 + S2. +- divide: S = S1. For similar operations between Decimal and integers, the result is Decimal of the same size as an argument. @@ -109,5 +109,5 @@ DB::Exception: Can't compare. ``` **See also** -- [isDecimalOverflow](../../sql-reference/functions/other-functions.md#is-decimal-overflow) -- [countDigits](../../sql-reference/functions/other-functions.md#count-digits) +- [isDecimalOverflow](../../sql-reference/functions/other-functions.md#is-decimal-overflow) +- [countDigits](../../sql-reference/functions/other-functions.md#count-digits) diff --git a/docs/en/sql-reference/data-types/domains/index.md b/docs/en/sql-reference/data-types/domains/index.md index f6f14186dcc..13ec1735d4d 100644 --- a/docs/en/sql-reference/data-types/domains/index.md +++ b/docs/en/sql-reference/data-types/domains/index.md @@ -10,20 +10,20 @@ Domains are special-purpose types that add some extra features atop of existing You can use domains anywhere corresponding base type can be used, for example: -- Create a column of a domain type -- Read/write values from/to domain column -- Use it as an index if a base type can be used as an index -- Call functions with values of domain column +- Create a column of a domain type +- Read/write values from/to domain column +- Use it as an index if a base type can be used as an index +- Call functions with values of domain column ### Extra Features of Domains -- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` -- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` -- Output to human-friendly format for `SELECT domain_column FROM domain_table` -- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` +- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` +- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` +- Output to human-friendly format for `SELECT domain_column FROM domain_table` +- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` ### Limitations -- Can’t convert index column of base type to domain type via `ALTER TABLE`. -- Can’t implicitly convert string values into domain values when inserting data from another column or table. -- Domain adds no constrains on stored values. +- Can’t convert index column of base type to domain type via `ALTER TABLE`. +- Can’t implicitly convert string values into domain values when inserting data from another column or table. +- Domain adds no constrains on stored values. diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index a685b341414..02e73a0360e 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -12,8 +12,8 @@ Named values can be declared as `'string' = integer` pairs or `'string'` names . ClickHouse supports: -- 8-bit `Enum`. It can contain up to 256 values enumerated in the `[-128, 127]` range. -- 16-bit `Enum`. It can contain up to 65536 values enumerated in the `[-32768, 32767]` range. +- 8-bit `Enum`. It can contain up to 256 values enumerated in the `[-128, 127]` range. +- 16-bit `Enum`. It can contain up to 65536 values enumerated in the `[-32768, 32767]` range. ClickHouse automatically chooses the type of `Enum` when data is inserted. You can also use `Enum8` or `Enum16` types to be sure in the size of storage. diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index e28f957e49c..a56b3fccbc1 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -20,17 +20,17 @@ The `FixedString` type is efficient when data has the length of precisely `N` by Examples of the values that can be efficiently stored in `FixedString`-typed columns: -- The binary representation of IP addresses (`FixedString(16)` for IPv6). -- Language codes (ru_RU, en_US … ). -- Currency codes (USD, RUB … ). -- Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). +- The binary representation of IP addresses (`FixedString(16)` for IPv6). +- Language codes (ru_RU, en_US … ). +- Currency codes (USD, RUB … ). +- Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type. When inserting the data, ClickHouse: -- Complements a string with null bytes if the string contains fewer than `N` bytes. -- Throws the `Too large value for FixedString(N)` exception if the string contains more than `N` bytes. +- Complements a string with null bytes if the string contains fewer than `N` bytes. +- Throws the `Too large value for FixedString(N)` exception if the string contains more than `N` bytes. When selecting the data, ClickHouse does not remove the null bytes at the end of the string. If you use the `WHERE` clause, you should add null bytes manually to match the `FixedString` value. The following example illustrates how to use the `WHERE` clause with `FixedString`. diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index 38c414fa8cd..3b55271f707 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -6,7 +6,7 @@ sidebar_label: Float32, Float64 # Float32, Float64 -:::warning +:::note If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below: ``` @@ -30,19 +30,19 @@ SELECT sumKahan(my_float), sumKahan(my_decimal) FROM float_vs_decimal; Types are equivalent to types of C: -- `Float32` — `float`. -- `Float64` — `double`. +- `Float32` — `float`. +- `Float64` — `double`. Aliases: -- `Float32` — `FLOAT`. -- `Float64` — `DOUBLE`. +- `Float32` — `FLOAT`. +- `Float64` — `DOUBLE`. When creating tables, numeric parameters for floating point numbers can be set (e.g. `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), but ClickHouse ignores them. ## Using Floating-point Numbers -- Computations with floating-point numbers might produce a rounding error. +- Computations with floating-point numbers might produce a rounding error. @@ -56,15 +56,15 @@ SELECT 1 - 0.9 └─────────────────────┘ ``` -- The result of the calculation depends on the calculation method (the processor type and architecture of the computer system). -- Floating-point calculations might result in numbers such as infinity (`Inf`) and “not-a-number” (`NaN`). This should be taken into account when processing the results of calculations. -- When parsing floating-point numbers from text, the result might not be the nearest machine-representable number. +- The result of the calculation depends on the calculation method (the processor type and architecture of the computer system). +- Floating-point calculations might result in numbers such as infinity (`Inf`) and “not-a-number” (`NaN`). This should be taken into account when processing the results of calculations. +- When parsing floating-point numbers from text, the result might not be the nearest machine-representable number. ## NaN and Inf In contrast to standard SQL, ClickHouse supports the following categories of floating-point numbers: -- `Inf` – Infinity. +- `Inf` – Infinity. @@ -78,7 +78,7 @@ SELECT 0.5 / 0 └────────────────┘ ``` -- `-Inf` — Negative infinity. +- `-Inf` — Negative infinity. @@ -92,7 +92,7 @@ SELECT -0.5 / 0 └─────────────────┘ ``` -- `NaN` — Not a number. +- `NaN` — Not a number. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index ef6a0fb3ea5..88663968e50 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -1,13 +1,33 @@ --- slug: /en/sql-reference/data-types/ -sidebar_label: Data Types +sidebar_label: List of data types sidebar_position: 37 --- -# Data Types +# ClickHouse Data Types -ClickHouse can store various kinds of data in table cells. +ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any. -This section describes the supported data types and special considerations for using and/or implementing them if any. +:::note +You can check whether a data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. +::: -You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. +ClickHouse data types include: + +- **Integer types**: [signed and unsigned integers](./int-uint.md) (`UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`, `UInt256`, `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `Int256`) +- **Floating-point numbers**: [floats](./float.md)(`Float32` and `Float64`) and [`Decimal` values](./decimal.md) +- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md) +- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md) +- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time +- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column +- **UUID**: a performant option for storing [`UUID` values](./uuid.md) +- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column +- **Arrays**: any column can be defined as an [`Array` of values](./array.md) +- **Maps**: use [`Map`](./map.md) for storing key/value pairs +- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results +- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell +- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type. +- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type) +- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses +- **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon` +- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md) diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index b8a55b62e36..b551143d92f 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -12,25 +12,25 @@ When creating tables, numeric parameters for integer numbers can be set (e.g. `T ## Int Ranges -- `Int8` — \[-128 : 127\] -- `Int16` — \[-32768 : 32767\] -- `Int32` — \[-2147483648 : 2147483647\] -- `Int64` — \[-9223372036854775808 : 9223372036854775807\] -- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\] -- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\] +- `Int8` — \[-128 : 127\] +- `Int16` — \[-32768 : 32767\] +- `Int32` — \[-2147483648 : 2147483647\] +- `Int64` — \[-9223372036854775808 : 9223372036854775807\] +- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\] +- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\] Aliases: -- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. -- `Int16` — `SMALLINT`, `INT2`. -- `Int32` — `INT`, `INT4`, `INTEGER`. -- `Int64` — `BIGINT`. +- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. +- `Int16` — `SMALLINT`, `INT2`. +- `Int32` — `INT`, `INT4`, `INTEGER`. +- `Int64` — `BIGINT`. ## UInt Ranges -- `UInt8` — \[0 : 255\] -- `UInt16` — \[0 : 65535\] -- `UInt32` — \[0 : 4294967295\] -- `UInt64` — \[0 : 18446744073709551615\] -- `UInt128` — \[0 : 340282366920938463463374607431768211455\] -- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] +- `UInt8` — \[0 : 255\] +- `UInt16` — \[0 : 65535\] +- `UInt32` — \[0 : 4294967295\] +- `UInt64` — \[0 : 18446744073709551615\] +- `UInt128` — \[0 : 340282366920938463463374607431768211455\] +- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index d9099ba5ad3..f727f0d75f7 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -6,15 +6,15 @@ sidebar_label: JSON # JSON -:::warning -This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead. +:::note +This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. ::: Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. -:::warning +:::note The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`. ::: diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 58a99baa09e..7810f4c5324 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -16,7 +16,7 @@ LowCardinality(data_type) **Parameters** -- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. +- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. ## Description @@ -44,19 +44,19 @@ ORDER BY id Settings: -- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) -- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) -- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) -- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) -- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) +- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) +- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) +- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) +- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) +- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) Functions: -- [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) +- [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) ## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/) -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) +- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) - Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index b0659746ba7..0ea183d73d8 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -10,8 +10,8 @@ sidebar_label: Map(key, value) **Parameters** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). +- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. @@ -106,5 +106,10 @@ Result: **See Also** -- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function -- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function +- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function +- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function + + +## Related content + +- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse) diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md index 1d958c018d8..d118170cd39 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md @@ -1,7 +1,105 @@ --- -slug: /en/sql-reference/data-types/nested-data-structures/ -sidebar_label: Nested Data Structures -sidebar_position: 54 +slug: /en/sql-reference/data-types/nested-data-structures/nested +sidebar_position: 57 +sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) --- -# Nested Data Structures +# Nested + +## Nested(name1 Type1, Name2 Type2, …) + +A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. + +Example: + +``` sql +CREATE TABLE test.visits +( + CounterID UInt32, + StartDate Date, + Sign Int8, + IsNew UInt8, + VisitID UInt64, + UserID UInt64, + ... + Goals Nested + ( + ID UInt32, + Serial UInt32, + EventTime DateTime, + Price Int64, + OrderID String, + CurrencyID UInt32 + ), + ... +) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign) +``` + +This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the ‘visits’ table can correspond to zero or any number of conversions. + +When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported. + +In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length. + +Example: + +``` sql +SELECT + Goals.ID, + Goals.EventTime +FROM test.visits +WHERE CounterID = 101500 AND length(Goals.ID) < 5 +LIMIT 10 +``` + +``` text +┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐ +│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │ +│ [1073752] │ ['2014-03-17 00:28:25'] │ +│ [1073752] │ ['2014-03-17 10:46:20'] │ +│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │ +│ [] │ [] │ +│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │ +│ [] │ [] │ +│ [] │ [] │ +│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │ +│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │ +└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +It is easiest to think of a nested data structure as a set of multiple column arrays of the same length. + +The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example: + +``` sql +SELECT + Goal.ID, + Goal.EventTime +FROM test.visits +ARRAY JOIN Goals AS Goal +WHERE CounterID = 101500 AND length(Goals.ID) < 5 +LIMIT 10 +``` + +``` text +┌─Goal.ID─┬──────Goal.EventTime─┐ +│ 1073752 │ 2014-03-17 16:38:10 │ +│ 591325 │ 2014-03-17 16:38:48 │ +│ 591325 │ 2014-03-17 16:42:27 │ +│ 1073752 │ 2014-03-17 00:28:25 │ +│ 1073752 │ 2014-03-17 10:46:20 │ +│ 1073752 │ 2014-03-17 13:59:20 │ +│ 591325 │ 2014-03-17 22:17:55 │ +│ 591325 │ 2014-03-17 22:18:07 │ +│ 591325 │ 2014-03-17 22:18:51 │ +│ 1073752 │ 2014-03-17 11:37:06 │ +└─────────┴─────────────────────┘ +``` + +You can’t perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it. + +For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length. + +For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way. + +The ALTER query for elements in a nested data structure has limitations. diff --git a/docs/en/sql-reference/data-types/nested-data-structures/nested.md b/docs/en/sql-reference/data-types/nested-data-structures/nested.md deleted file mode 100644 index d118170cd39..00000000000 --- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -slug: /en/sql-reference/data-types/nested-data-structures/nested -sidebar_position: 57 -sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) ---- - -# Nested - -## Nested(name1 Type1, Name2 Type2, …) - -A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. - -Example: - -``` sql -CREATE TABLE test.visits -( - CounterID UInt32, - StartDate Date, - Sign Int8, - IsNew UInt8, - VisitID UInt64, - UserID UInt64, - ... - Goals Nested - ( - ID UInt32, - Serial UInt32, - EventTime DateTime, - Price Int64, - OrderID String, - CurrencyID UInt32 - ), - ... -) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign) -``` - -This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the ‘visits’ table can correspond to zero or any number of conversions. - -When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported. - -In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length. - -Example: - -``` sql -SELECT - Goals.ID, - Goals.EventTime -FROM test.visits -WHERE CounterID = 101500 AND length(Goals.ID) < 5 -LIMIT 10 -``` - -``` text -┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐ -│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │ -│ [1073752] │ ['2014-03-17 00:28:25'] │ -│ [1073752] │ ['2014-03-17 10:46:20'] │ -│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │ -│ [] │ [] │ -│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │ -│ [] │ [] │ -│ [] │ [] │ -│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │ -│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │ -└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -It is easiest to think of a nested data structure as a set of multiple column arrays of the same length. - -The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example: - -``` sql -SELECT - Goal.ID, - Goal.EventTime -FROM test.visits -ARRAY JOIN Goals AS Goal -WHERE CounterID = 101500 AND length(Goals.ID) < 5 -LIMIT 10 -``` - -``` text -┌─Goal.ID─┬──────Goal.EventTime─┐ -│ 1073752 │ 2014-03-17 16:38:10 │ -│ 591325 │ 2014-03-17 16:38:48 │ -│ 591325 │ 2014-03-17 16:42:27 │ -│ 1073752 │ 2014-03-17 00:28:25 │ -│ 1073752 │ 2014-03-17 10:46:20 │ -│ 1073752 │ 2014-03-17 13:59:20 │ -│ 591325 │ 2014-03-17 22:17:55 │ -│ 591325 │ 2014-03-17 22:18:07 │ -│ 591325 │ 2014-03-17 22:18:51 │ -│ 1073752 │ 2014-03-17 11:37:06 │ -└─────────┴─────────────────────┘ -``` - -You can’t perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it. - -For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length. - -For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way. - -The ALTER query for elements in a nested data structure has limitations. diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 230b4af7960..28180f7f991 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -8,7 +8,7 @@ sidebar_label: Nullable Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`. -For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`. +For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`. A `Nullable` type field can’t be included in table indexes. diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 342a0294eb6..517a28576f0 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -9,20 +9,20 @@ The common way to produce an aggregate function value is by calling the aggregat The following aggregate functions are supported: -- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) -- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) -- [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min) -- [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max) -- [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) -- [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx) -- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand) -- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) -- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) -- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) -- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md) -- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) -- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) -- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) +- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) +- [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md) +- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) +- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) +- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) :::note @@ -33,8 +33,8 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way **Parameters** -- Name of the aggregate function. -- Types of the aggregate function arguments. +- Name of the aggregate function. +- Types of the aggregate function arguments. **Example** diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index 5169bc646c9..bedbcf0bd28 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -8,25 +8,24 @@ sidebar_label: Interval The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator. -:::warning -`Interval` data type values can’t be stored in tables. -::: - Structure: -- Time interval as an unsigned integer value. -- Type of an interval. +- Time interval as an unsigned integer value. +- Type of an interval. Supported interval types: -- `SECOND` -- `MINUTE` -- `HOUR` -- `DAY` -- `WEEK` -- `MONTH` -- `QUARTER` -- `YEAR` +- `NANOSECOND` +- `MICROSECOND` +- `MILLISECOND` +- `SECOND` +- `MINUTE` +- `HOUR` +- `DAY` +- `WEEK` +- `MONTH` +- `QUARTER` +- `YEAR` For each interval type, there is a separate data type. For example, the `DAY` interval corresponds to the `IntervalDay` data type: @@ -81,5 +80,5 @@ Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argu ## See Also -- [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator -- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions +- [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator +- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions diff --git a/docs/en/sql-reference/data-types/string.md b/docs/en/sql-reference/data-types/string.md index fce16320240..f891a9303e5 100644 --- a/docs/en/sql-reference/data-types/string.md +++ b/docs/en/sql-reference/data-types/string.md @@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR Aliases: -- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. +- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. ## Encodings diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md b/docs/en/sql-reference/dictionaries/_snippet_dictionary_in_cloud.md similarity index 100% rename from docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md rename to docs/en/sql-reference/dictionaries/_snippet_dictionary_in_cloud.md diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml deleted file mode 100644 index af79ff9af23..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml +++ /dev/null @@ -1,8 +0,0 @@ -position: 37 -label: 'Dictionaries' -collapsible: true -collapsed: true -link: - type: generated-index - title: Dictionaries - slug: /en/sql-reference/dictionaries/external-dictionaries diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md deleted file mode 100644 index ee9cd2c1f2e..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical -sidebar_position: 45 -sidebar_label: Hierarchical dictionaries ---- - -# Hierarchical Dictionaries - -ClickHouse supports hierarchical dictionaries with a [numeric key](../../dictionaries/external-dictionaries/external-dicts-dict-structure.md#numeric-key). - -Look at the following hierarchical structure: - -``` text -0 (Common parent) -│ -├── 1 (Russia) -│ │ -│ └── 2 (Moscow) -│ │ -│ └── 3 (Center) -│ -└── 4 (Great Britain) - │ - └── 5 (London) -``` - -This hierarchy can be expressed as the following dictionary table. - -| region_id | parent_region | region_name | -|------------|----------------|---------------| -| 1 | 0 | Russia | -| 2 | 1 | Moscow | -| 3 | 2 | Center | -| 4 | 0 | Great Britain | -| 5 | 4 | London | - -This table contains a column `parent_region` that contains the key of the nearest parent for the element. - -ClickHouse supports the [hierarchical](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#hierarchical-dict-attr) property for [external dictionary](../../../sql-reference/dictionaries/external-dictionaries/) attributes. This property allows you to configure the hierarchical dictionary similar to described above. - -The [dictGetHierarchy](../../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy) function allows you to get the parent chain of an element. - -For our example, the structure of dictionary can be the following: - -``` xml - - - - region_id - - - - parent_region - UInt64 - 0 - true - - - - region_name - String - - - - - -``` diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md deleted file mode 100644 index 4dc6fd33849..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ /dev/null @@ -1,751 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout -sidebar_position: 41 -sidebar_label: Storing Dictionaries in Memory ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -# Storing Dictionaries in Memory - -There are a variety of ways to store dictionaries in memory. - -We recommend [flat](#flat), [hashed](#dicts-external_dicts_dict_layout-hashed) and [complex_key_hashed](#complex-key-hashed), which provide optimal processing speed. - -Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section [cache](#cache). - -There are several ways to improve dictionary performance: - -- Call the function for working with the dictionary after `GROUP BY`. -- Mark attributes to extract as injective. An attribute is called injective if different attribute values correspond to different keys. So when `GROUP BY` uses a function that fetches an attribute value by the key, this function is automatically taken out of `GROUP BY`. - -ClickHouse generates an exception for errors with dictionaries. Examples of errors: - -- The dictionary being accessed could not be loaded. -- Error querying a `cached` dictionary. - -You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. - - - -The configuration looks like this: - -``` xml - - - ... - - - - - - ... - - -``` - -Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md): - -``` sql -CREATE DICTIONARY (...) -... -LAYOUT(LAYOUT_TYPE(param value)) -- layout settings -... -``` - -Dictionaries without word `complex-key*` in a layout have a key with [UInt64](../../../sql-reference/data-types/int-uint.md) type, `complex-key*` dictionaries have a composite key (complex, with arbitrary types). - -[UInt64](../../../sql-reference/data-types/int-uint.md) keys in XML dictionaries are defined with `` tag. - -Configuration example (column key_column has UInt64 type): -```xml -... - - - key_column - -... -``` - -Composite `complex` keys XML dictionaries are defined `` tag. - -Configuration example of a composite key (key has one element with [String](../../../sql-reference/data-types/string.md) type): -```xml -... - - - - country_code - String - - -... -``` - -## Ways to Store Dictionaries in Memory - -- [flat](#flat) -- [hashed](#dicts-external_dicts_dict_layout-hashed) -- [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed) -- [complex_key_hashed](#complex-key-hashed) -- [complex_key_sparse_hashed](#complex-key-sparse-hashed) -- [hashed_array](#dicts-external_dicts_dict_layout-hashed-array) -- [complex_key_hashed_array](#complex-key-hashed-array) -- [range_hashed](#range-hashed) -- [complex_key_range_hashed](#complex-key-range-hashed) -- [cache](#cache) -- [complex_key_cache](#complex-key-cache) -- [ssd_cache](#ssd-cache) -- [complex_key_ssd_cache](#complex-key-ssd-cache) -- [direct](#direct) -- [complex_key_direct](#complex-key-direct) -- [ip_trie](#ip-trie) - -### flat - -The dictionary is completely stored in memory in the form of flat arrays. How much memory does the dictionary use? The amount is proportional to the size of the largest key (in space used). - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type and the value is limited to `max_array_size` (by default — 500,000). If a larger key is discovered when creating the dictionary, ClickHouse throws an exception and does not create the dictionary. Dictionary flat arrays initial size is controlled by `initial_array_size` setting (by default — 1024). - -All types of sources are supported. When updating, data (from a file or from a table) is read in it entirety. - -This method provides the best performance among all available methods of storing the dictionary. - -Configuration example: - -``` xml - - - 50000 - 5000000 - - -``` - -or - -``` sql -LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000)) -``` - -### hashed - -The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. - -All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. - -Configuration example: - -``` xml - - - -``` - -or - -``` sql -LAYOUT(HASHED()) -``` - -If `shards` greater then 1 (default is `1`) the dictionary will load data in parallel, useful if you have huge amount of elements in one dictionary. - -Configuration example: - -``` xml - - - 10 - - 10000 - - -``` - -or - -``` sql -LAYOUT(HASHED(SHARDS 10 [SHARD_LOAD_QUEUE_BACKLOG 10000])) -``` - -### sparse_hashed - -Similar to `hashed`, but uses less memory in favor more CPU usage. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. - -Configuration example: - -``` xml - - - -``` - -or - -``` sql -LAYOUT(SPARSE_HASHED()) -``` - -It is also possible to use `shards` for this type of dictionary, and again it is more important for `sparse_hashed` then for `hashed`, since `sparse_hashed` is slower. - -### complex_key_hashed - -This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `hashed`. - -Configuration example: - -``` xml - - - 1 - - - -``` - -or - -``` sql -LAYOUT(COMPLEX_KEY_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000])) -``` - -### complex_key_sparse_hashed - -This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed). - -Configuration example: - -``` xml - - - 1 - - -``` - -or - -``` sql -LAYOUT(COMPLEX_KEY_SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000])) -``` - -### hashed_array - -The dictionary is completely stored in memory. Each attribute is stored in an array. The key attribute is stored in the form of a hashed table where value is an index in the attributes array. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. - -All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. - -Configuration example: - -``` xml - - - - -``` - -or - -``` sql -LAYOUT(HASHED_ARRAY()) -``` - -### complex_key_hashed_array - -This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to [hashed_array](#dicts-external_dicts_dict_layout-hashed-array). - -Configuration example: - -``` xml - - - -``` - -or - -``` sql -LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) -``` - -### range_hashed - -The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. -This storage method works the same way as hashed and allows using date/time (arbitrary numeric type) ranges in addition to the key. - -Example: The table contains discounts for each advertiser in the format: - -``` text -┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐ -│ 123 │ 2015-01-16 │ 2015-01-31 │ 0.25 │ -│ 123 │ 2015-01-01 │ 2015-01-15 │ 0.15 │ -│ 456 │ 2015-01-01 │ 2015-01-15 │ 0.05 │ -└───────────────┴─────────────────────┴───────────────────┴────────┘ -``` - -To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). - -:::warning -Values of `range_min` and `range_max` should fit in `Int64` type. -::: - -Example: - -``` xml - - - - min - - - - - advertiser_id - - - discount_start_date - Date - - - discount_end_date - Date - - ... -``` - -or - -``` sql -CREATE DICTIONARY discounts_dict ( - advertiser_id UInt64, - discount_start_date Date, - discount_end_date Date, - amount Float64 -) -PRIMARY KEY id -SOURCE(CLICKHOUSE(TABLE 'discounts')) -LIFETIME(MIN 1 MAX 1000) -LAYOUT(RANGE_HASHED(range_lookup_strategy 'max')) -RANGE(MIN discount_start_date MAX discount_end_date) -``` - -To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected: - -``` sql -dictGet('dict_name', 'attr_name', id, date) -``` -Query example: - -``` sql -SELECT dictGet('discounts_dict', 'amount', 1, '2022-10-20'::Date); -``` - -This function returns the value for the specified `id`s and the date range that includes the passed date. - -Details of the algorithm: - -- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type. -- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. -- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. -- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value. - -Configuration example: - -``` xml - - - ... - - - - - - - - Abcdef - - - StartTimeStamp - UInt64 - - - EndTimeStamp - UInt64 - - - XXXType - String - - - - - - -``` - -or - -``` sql -CREATE DICTIONARY somedict( - Abcdef UInt64, - StartTimeStamp UInt64, - EndTimeStamp UInt64, - XXXType String DEFAULT '' -) -PRIMARY KEY Abcdef -RANGE(MIN StartTimeStamp MAX EndTimeStamp) -``` - -Configuration example with overlapping ranges and open ranges: - -```sql -CREATE TABLE discounts -( - advertiser_id UInt64, - discount_start_date Date, - discount_end_date Nullable(Date), - amount Float64 -) -ENGINE = Memory; - -INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1); -INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2); -INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3); -INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4); -INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5); -INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6); - -SELECT * FROM discounts ORDER BY advertiser_id, discount_start_date; -┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐ -│ 1 │ 2015-01-01 │ ᴺᵁᴸᴸ │ 0.1 │ -│ 1 │ 2015-01-15 │ ᴺᵁᴸᴸ │ 0.2 │ -│ 2 │ 2015-01-01 │ 2015-01-15 │ 0.3 │ -│ 2 │ 2015-01-04 │ 2015-01-10 │ 0.4 │ -│ 3 │ 1970-01-01 │ 2015-01-15 │ 0.5 │ -│ 3 │ 1970-01-01 │ 2015-01-10 │ 0.6 │ -└───────────────┴─────────────────────┴───────────────────┴────────┘ - --- RANGE_LOOKUP_STRATEGY 'max' - -CREATE DICTIONARY discounts_dict -( - advertiser_id UInt64, - discount_start_date Date, - discount_end_date Nullable(Date), - amount Float64 -) -PRIMARY KEY advertiser_id -SOURCE(CLICKHOUSE(TABLE discounts)) -LIFETIME(MIN 600 MAX 900) -LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max')) -RANGE(MIN discount_start_date MAX discount_end_date); - -select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res; -┌─res─┐ -│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null -└─────┘ - -select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res; -┌─res─┐ -│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1) -└─────┘ - -select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res; -┌─res─┐ -│ 0.4 │ -- two ranges are matching, range_min 2015-01-04 (0.4) is bigger than 2015-01-01 (0.3) -└─────┘ - -select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res; -┌─res─┐ -│ 0.5 │ -- two ranges are matching, range_min are equal, 2015-01-15 (0.5) is bigger than 2015-01-10 (0.6) -└─────┘ - -DROP DICTIONARY discounts_dict; - --- RANGE_LOOKUP_STRATEGY 'min' - -CREATE DICTIONARY discounts_dict -( - advertiser_id UInt64, - discount_start_date Date, - discount_end_date Nullable(Date), - amount Float64 -) -PRIMARY KEY advertiser_id -SOURCE(CLICKHOUSE(TABLE discounts)) -LIFETIME(MIN 600 MAX 900) -LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'min')) -RANGE(MIN discount_start_date MAX discount_end_date); - -select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res; -┌─res─┐ -│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null -└─────┘ - -select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res; -┌─res─┐ -│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2) -└─────┘ - -select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res; -┌─res─┐ -│ 0.3 │ -- two ranges are matching, range_min 2015-01-01 (0.3) is less than 2015-01-04 (0.4) -└─────┘ - -select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res; -┌─res─┐ -│ 0.6 │ -- two ranges are matching, range_min are equal, 2015-01-10 (0.6) is less than 2015-01-15 (0.5) -└─────┘ -``` - -### complex_key_range_hashed - -The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values (see [range_hashed](#range-hashed)). This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). - -Configuration example: - -``` sql -CREATE DICTIONARY range_dictionary -( - CountryID UInt64, - CountryKey String, - StartDate Date, - EndDate Date, - Tax Float64 DEFAULT 0.2 -) -PRIMARY KEY CountryID, CountryKey -SOURCE(CLICKHOUSE(TABLE 'date_table')) -LIFETIME(MIN 1 MAX 1000) -LAYOUT(COMPLEX_KEY_RANGE_HASHED()) -RANGE(MIN StartDate MAX EndDate); -``` - -### cache - -The dictionary is stored in a cache that has a fixed number of cells. These cells contain frequently used elements. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. - -When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache. - -If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. - -For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. - -This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. - -If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source. - -To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally. - -All types of sources are supported. - -Example of settings: - -``` xml - - - - 1000000000 - - 0 - - 100000 - - 10 - - 60000 - - 4 - - -``` - -or - -``` sql -LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) -``` - -Set a large enough cache size. You need to experiment to select the number of cells: - -1. Set some value. -2. Run queries until the cache is completely full. -3. Assess memory consumption using the `system.dictionaries` table. -4. Increase or decrease the number of cells until the required memory consumption is reached. - -:::warning -Do not use ClickHouse as a source, because it is slow to process queries with random reads. -::: - -### complex_key_cache - -This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `cache`. - -### ssd_cache - -Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. - -``` xml - - - - 4096 - - 16777216 - - 131072 - - 1048576 - - /var/lib/clickhouse/user_files/test_dict - - -``` - -or - -``` sql -LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 - PATH '/var/lib/clickhouse/user_files/test_dict')) -``` - -### complex_key_ssd_cache - -This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `ssd_cache`. - -### direct - -The dictionary is not stored in memory and directly goes to the source during the processing of a request. - -The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type. - -All types of [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), except local files, are supported. - -Configuration example: - -``` xml - - - -``` - -or - -``` sql -LAYOUT(DIRECT()) -``` - -### complex_key_direct - -This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `direct`. - -### ip_trie - -This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN. - -**Example** - -Suppose we have a table in ClickHouse that contains our IP prefixes and mappings: - -```sql -CREATE TABLE my_ip_addresses ( - prefix String, - asn UInt32, - cca2 String -) -ENGINE = MergeTree -PRIMARY KEY prefix; -``` - -```sql -INSERT INTO my_ip_addresses VALUES - ('202.79.32.0/20', 17501, 'NP'), - ('2620:0:870::/48', 3856, 'US'), - ('2a02:6b8:1::/48', 13238, 'RU'), - ('2001:db8::/32', 65536, 'ZZ') -; -``` - -Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key: - -``` xml - - - - prefix - String - - - - asn - UInt32 - - - - cca2 - String - ?? - - ... - - - - - - true - - -``` - -or - -``` sql -CREATE DICTIONARY my_ip_trie_dictionary ( - prefix String, - asn UInt32, - cca2 String DEFAULT '??' -) -PRIMARY KEY prefix -SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses')) -LAYOUT(IP_TRIE) -LIFETIME(3600); -``` - -The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet. - -For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is: - -``` sql -dictGetT('dict_name', 'attr_name', tuple(ip)) -``` - -The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example: - -``` sql -select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) -``` - -Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. - -Data must completely fit into RAM. - -## Related Content - -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md deleted file mode 100644 index 8e9dbd392aa..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ /dev/null @@ -1,142 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime -sidebar_position: 42 -sidebar_label: Dictionary Updates ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -# Dictionary Updates - -ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `lifetime` tag in seconds. - -Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -Example of settings: - - - -``` xml - - ... - 300 - ... - -``` - -or - -``` sql -CREATE DICTIONARY (...) -... -LIFETIME(300) -... -``` - -Setting `0` (`LIFETIME(0)`) prevents dictionaries from updating. - -You can set a time interval for updates, and ClickHouse will choose a uniformly random time within this range. This is necessary in order to distribute the load on the dictionary source when updating on a large number of servers. - -Example of settings: - -``` xml - - ... - - 300 - 360 - - ... - -``` - -or - -``` sql -LIFETIME(MIN 300 MAX 360) -``` - -If `0` and `0`, ClickHouse does not reload the dictionary by timeout. -In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. - -When updating the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): - -- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`). -- Dictionaries from other sources are updated every time by default. - -For other sources (ODBC, PostgreSQL, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: - -- The dictionary table must have a field that always changes when the source data is updated. -- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md). - -Example of settings: - -``` xml - - ... - - ... - SELECT update_time FROM dictionary_source where id = 1 - - ... - -``` - -or - -``` sql -... -SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1')) -... -``` - -For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported. - -It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source. - -- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. -- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. -- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. - - Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example: - ```sql - ... - SOURCE(CLICKHOUSE(... - update_field 'added_time' - QUERY ' - SELECT my_arr.1 AS x, my_arr.2 AS y, creation_time - FROM ( - SELECT arrayZip(x_arr, y_arr) AS my_arr, creation_time - FROM dictionary_source - WHERE {condition} - )' - )) - ... - ``` - -If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. - -Example of settings: - -``` xml - - ... - - ... - added_time - 15 - - ... - -``` - -or - -``` sql -... -SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) -... -``` - -## Related Content - -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md deleted file mode 100644 index 8ef19a181e7..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon -sidebar_position: 46 -sidebar_label: Polygon Dictionaries With Grids -title: "Polygon dictionaries" ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -Polygon dictionaries allow you to efficiently search for the polygon containing specified points. -For example: defining a city area by geographical coordinates. - -Example of a polygon dictionary configuration: - - - -``` xml - - - - - key - Array(Array(Array(Array(Float64)))) - - - - - name - String - - - - - value - UInt64 - 0 - - - - - - 1 - - - - ... - -``` - -The corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): -``` sql -CREATE DICTIONARY polygon_dict_name ( - key Array(Array(Array(Array(Float64)))), - name String, - value UInt64 -) -PRIMARY KEY key -LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) -... -``` - -When configuring the polygon dictionary, the key must have one of two types: - -- A simple polygon. It is an array of points. -- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. - -Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported. - -The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse. - -There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available: - -- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. - -- `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). -Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. -The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. -The division stops when the recursion depth reaches `MAX_DEPTH` or when the cell crosses no more than `MIN_INTERSECTIONS` polygons. -To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. - -- `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. - -- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`. - -Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with dictionaries. -An important difference is that here the keys will be the points for which you want to find the polygon containing them. - -**Example** - -Example of working with the dictionary defined above: - -``` sql -CREATE TABLE points ( - x Float64, - y Float64 -) -... -SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y; -``` - -As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output. - -**Example** - -You can read columns from polygon dictionaries via SELECT query, just turn on the `store_polygon_key_column = 1` in the dictionary configuration or corresponding DDL-query. - -Query: - -``` sql -CREATE TABLE polygons_test_table -( - key Array(Array(Array(Tuple(Float64, Float64)))), - name String -) ENGINE = TinyLog; - -INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value'); - -CREATE DICTIONARY polygons_test_dictionary -( - key Array(Array(Array(Tuple(Float64, Float64)))), - name String -) -PRIMARY KEY key -SOURCE(CLICKHOUSE(TABLE 'polygons_test_table')) -LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) -LIFETIME(0); - -SELECT * FROM polygons_test_dictionary; -``` - -Result: - -``` text -┌─key─────────────────────────────┬─name──┐ -│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │ -└─────────────────────────────────┴───────┘ -``` - -## Related Content - -- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md deleted file mode 100644 index 897945a6d9d..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ /dev/null @@ -1,847 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources -sidebar_position: 43 -sidebar_label: Dictionary Sources ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -# Dictionary Sources - - - -A dictionary can be connected to ClickHouse from many different sources. - -If the dictionary is configured using an xml-file, the configuration looks like this: - -``` xml - - - ... - - - - - - ... - - ... - -``` - -In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), the configuration described above will look like: - -``` sql -CREATE DICTIONARY dict_name (...) -... -SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration -... -``` - -The source is configured in the `source` section. - -For source types [Local file](#dicts-external_dicts_dict_sources-local_file), [Executable file](#dicts-external_dicts_dict_sources-executable), [HTTP(s)](#dicts-external_dicts_dict_sources-http), [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) -optional settings are available: - -``` xml - - - /opt/dictionaries/os.tsv - TabSeparated - - - 0 - - -``` - -or - -``` sql -SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated')) -SETTINGS(format_csv_allow_single_quotes = 0) -``` - -Types of sources (`source_type`): - -- [Local file](#dicts-external_dicts_dict_sources-local_file) -- [Executable File](#dicts-external_dicts_dict_sources-executable) -- [Executable Pool](#dicts-external_dicts_dict_sources-executable_pool) -- [HTTP(s)](#dicts-external_dicts_dict_sources-http) -- DBMS - - [ODBC](#odbc) - - [MySQL](#mysql) - - [ClickHouse](#clickhouse) - - [MongoDB](#mongodb) - - [Redis](#redis) - - [Cassandra](#cassandra) - - [PostgreSQL](#postgresql) - -## Local File - -Example of settings: - -``` xml - - - /opt/dictionaries/os.tsv - TabSeparated - - -``` - -or - -``` sql -SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated')) -``` - -Setting fields: - -- `path` – The absolute path to the file. -- `format` – The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported. - -When a dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in the `user_files` directory to prevent DB users from accessing arbitrary files on the ClickHouse node. - -**See Also** - -- [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) - -## Executable File - -Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. - -Example of settings: - -``` xml - - - cat /opt/dictionaries/os.tsv - TabSeparated - false - - -``` - -Setting fields: - -- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`). -- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported. -- `command_termination_timeout` — The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter. -- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. - -That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. - -## Executable Pool - -Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. - -Executable pool will spawn a pool of processes with the specified command and keep them running until they exit. The program should read data from STDIN while it is available and output the result to STDOUT. It can wait for the next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data, but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early. - -Example of settings: - -``` xml - - - while read key; do printf "$key\tData for key $key\n"; done - TabSeparated - 10 - 10 - false - - -``` - -Setting fields: - -- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). -- `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported. -- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. -- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. -- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter. -- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. - -That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. - -## Http(s) - -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. - -Example of settings: - -``` xml - - - http://[::1]/os.tsv - TabSeparated - - user - password - - -
- API-KEY - key -
-
-
- -``` - -or - -``` sql -SOURCE(HTTP( - url 'http://[::1]/os.tsv' - format 'TabSeparated' - credentials(user 'user' password 'password') - headers(header(name 'API-KEY' value 'key')) -)) -``` - -In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl) in the server configuration. - -Setting fields: - -- `url` – The source URL. -- `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported. -- `credentials` – Basic HTTP authentication. Optional parameter. -- `user` – Username required for the authentication. -- `password` – Password required for the authentication. -- `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter. -- `header` – Single HTTP header entry. -- `name` – Identifiant name used for the header send on the request. -- `value` – Value set for a specific identifiant name. - -When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. - -### Known Vulnerability of the ODBC Dictionary Functionality - -:::note -When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. -::: - -**Example of insecure use** - -Let’s configure unixODBC for PostgreSQL. Content of `/etc/odbc.ini`: - -``` text -[gregtest] -Driver = /usr/lib/psqlodbca.so -Servername = localhost -PORT = 5432 -DATABASE = test_db -#OPTION = 3 -USERNAME = test -PASSWORD = test -``` - -If you then make a query such as - -``` sql -SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); -``` - -ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. - -### Example of Connecting Postgresql - -Ubuntu OS. - -Installing unixODBC and the ODBC driver for PostgreSQL: - -``` bash -$ sudo apt-get install -y unixodbc odbcinst odbc-postgresql -``` - -Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that runs ClickHouse): - -``` text - [DEFAULT] - Driver = myconnection - - [myconnection] - Description = PostgreSQL connection to my_db - Driver = PostgreSQL Unicode - Database = my_db - Servername = 127.0.0.1 - UserName = username - Password = password - Port = 5432 - Protocol = 9.3 - ReadOnly = No - RowVersioning = No - ShowSystemTables = No - ConnSettings = -``` - -The dictionary configuration in ClickHouse: - -``` xml - - - table_name - - - - - DSN=myconnection - postgresql_table
-
- - - 300 - 360 - - - - - - - id - - - some_column - UInt64 - 0 - - -
-
-``` - -or - -``` sql -CREATE DICTIONARY table_name ( - id UInt64, - some_column UInt64 DEFAULT 0 -) -PRIMARY KEY id -SOURCE(ODBC(connection_string 'DSN=myconnection' table 'postgresql_table')) -LAYOUT(HASHED()) -LIFETIME(MIN 300 MAX 360) -``` - -You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. - -### Example of Connecting MS SQL Server - -Ubuntu OS. - -Installing the ODBC driver for connecting to MS SQL: - -``` bash -$ sudo apt-get install tdsodbc freetds-bin sqsh -``` - -Configuring the driver: - -```bash - $ cat /etc/freetds/freetds.conf - ... - - [MSSQL] - host = 192.168.56.101 - port = 1433 - tds version = 7.0 - client charset = UTF-8 - - # test TDS connection - $ sqsh -S MSSQL -D database -U user -P password - - - $ cat /etc/odbcinst.ini - - [FreeTDS] - Description = FreeTDS - Driver = /usr/lib/x86_64-linux-gnu/odbc/libtdsodbc.so - Setup = /usr/lib/x86_64-linux-gnu/odbc/libtdsS.so - FileUsage = 1 - UsageCount = 5 - - $ cat /etc/odbc.ini - # $ cat ~/.odbc.ini # if you signed in under a user that runs ClickHouse - - [MSSQL] - Description = FreeTDS - Driver = FreeTDS - Servername = MSSQL - Database = test - UID = test - PWD = test - Port = 1433 - - - # (optional) test ODBC connection (to use isql-tool install the [unixodbc](https://packages.debian.org/sid/unixodbc)-package) - $ isql -v MSSQL "user" "password" -``` - -Remarks: -- to determine the earliest TDS version that is supported by a particular SQL Server version, refer to the product documentation or look at [MS-TDS Product Behavior](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-tds/135d0ebe-5c4c-4a94-99bf-1811eccb9f4a) - -Configuring the dictionary in ClickHouse: - -``` xml - - - test - - - dict
- DSN=MSSQL;UID=test;PWD=test -
- - - - 300 - 360 - - - - - - - - - k - - - s - String - - - -
-
-``` - -or - -``` sql -CREATE DICTIONARY test ( - k UInt64, - s String DEFAULT '' -) -PRIMARY KEY k -SOURCE(ODBC(table 'dict' connection_string 'DSN=MSSQL;UID=test;PWD=test')) -LAYOUT(FLAT()) -LIFETIME(MIN 300 MAX 360) -``` - -## DBMS - -### ODBC - -You can use this method to connect any database that has an ODBC driver. - -Example of settings: - -``` xml - - - DatabaseName - ShemaName.TableName
- DSN=some_parameters - SQL_QUERY - SELECT id, value_1, value_2 FROM ShemaName.TableName -
- -``` - -or - -``` sql -SOURCE(ODBC( - db 'DatabaseName' - table 'SchemaName.TableName' - connection_string 'DSN=some_parameters' - invalidate_query 'SQL_QUERY' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. -- `table` – Name of the table and schema if exists. -- `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). -- `query` – The custom query. Optional parameter. - -:::note -The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. -::: - -ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. - -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](../../../faq/integration/oracle-odbc.md) item. - -### Mysql - -Example of settings: - -``` xml - - - 3306 - clickhouse - qwerty - - example01-1 - 1 - - - example01-2 - 1 - - db_name - table_name
- id=10 - SQL_QUERY - true - SELECT id, value_1, value_2 FROM db_name.table_name -
- -``` - -or - -``` sql -SOURCE(MYSQL( - port 3306 - user 'clickhouse' - password 'qwerty' - replica(host 'example01-1' priority 1) - replica(host 'example01-2' priority 1) - db 'db_name' - table 'table_name' - where 'id=10' - invalidate_query 'SQL_QUERY' - fail_on_connection_loss 'true' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `port` – The port on the MySQL server. You can specify it for all replicas, or for each one individually (inside ``). - -- `user` – Name of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). - -- `password` – Password of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). - -- `replica` – Section of replica configurations. There can be multiple sections. - - - `replica/host` – The MySQL host. - - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. - -- `db` – Name of the database. - -- `table` – Name of the table. - -- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. - -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). - -- `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`. - -- `query` – The custom query. Optional parameter. - -:::note -The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. -::: - -:::note -There is no explicit parameter `secure`. When establishing an SSL-connection security is mandatory. -::: - -MySQL can be connected to on a local host via sockets. To do this, set `host` and `socket`. - -Example of settings: - -``` xml - - - localhost - /path/to/socket/file.sock - clickhouse - qwerty - db_name - table_name
- id=10 - SQL_QUERY - true - SELECT id, value_1, value_2 FROM db_name.table_name -
- -``` - -or - -``` sql -SOURCE(MYSQL( - host 'localhost' - socket '/path/to/socket/file.sock' - user 'clickhouse' - password 'qwerty' - db 'db_name' - table 'table_name' - where 'id=10' - invalidate_query 'SQL_QUERY' - fail_on_connection_loss 'true' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -### ClickHouse - -Example of settings: - -``` xml - - - example01-01-1 - 9000 - default - - default - ids
- id=10 - 1 - SELECT id, value_1, value_2 FROM default.ids -
- -``` - -or - -``` sql -SOURCE(CLICKHOUSE( - host 'example01-01-1' - port 9000 - user 'default' - password '' - db 'default' - table 'ids' - where 'id=10' - secure 1 - query 'SELECT id, value_1, value_2 FROM default.ids' -)); -``` - -Setting fields: - -- `host` – The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [Distributed](../../../engines/table-engines/special/distributed.md) table and enter it in subsequent configurations. -- `port` – The port on the ClickHouse server. -- `user` – Name of the ClickHouse user. -- `password` – Password of the ClickHouse user. -- `db` – Name of the database. -- `table` – Name of the table. -- `where` – The selection criteria. May be omitted. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). -- `secure` - Use ssl for connection. -- `query` – The custom query. Optional parameter. - -:::note -The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. -::: - -### Mongodb - -Example of settings: - -``` xml - - - localhost - 27017 - - - test - dictionary_source - - -``` - -or - -``` sql -SOURCE(MONGODB( - host 'localhost' - port 27017 - user '' - password '' - db 'test' - collection 'dictionary_source' -)) -``` - -Setting fields: - -- `host` – The MongoDB host. -- `port` – The port on the MongoDB server. -- `user` – Name of the MongoDB user. -- `password` – Password of the MongoDB user. -- `db` – Name of the database. -- `collection` – Name of the collection. - -### Redis - -Example of settings: - -``` xml - - - localhost - 6379 - simple - 0 - - -``` - -or - -``` sql -SOURCE(REDIS( - host 'localhost' - port 6379 - storage_type 'simple' - db_index 0 -)) -``` - -Setting fields: - -- `host` – The Redis host. -- `port` – The port on the Redis server. -- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. -- `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. - -### Cassandra - -Example of settings: - -``` xml - - - localhost - 9042 - username - qwerty123 - database_name - table_name - 1 - 1 - One - "SomeColumn" = 42 - 8 - SELECT id, value_1, value_2 FROM database_name.table_name - - -``` - -Setting fields: - -- `host` – The Cassandra host or comma-separated list of hosts. -- `port` – The port on the Cassandra servers. If not specified, default port 9042 is used. -- `user` – Name of the Cassandra user. -- `password` – Password of the Cassandra user. -- `keyspace` – Name of the keyspace (database). -- `column_family` – Name of the column family (table). -- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. -- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key). -- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`. -- `where` – Optional selection criteria. -- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. -- `query` – The custom query. Optional parameter. - -:::note -The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. -::: - -### PostgreSQL - -Example of settings: - -``` xml - - - 5432 - clickhouse - qwerty - db_name - table_name
- id=10 - SQL_QUERY - SELECT id, value_1, value_2 FROM db_name.table_name -
- -``` - -or - -``` sql -SOURCE(POSTGRESQL( - port 5432 - host 'postgresql-hostname' - user 'postgres_user' - password 'postgres_password' - db 'db_name' - table 'table_name' - replica(host 'example01-1' port 5432 priority 1) - replica(host 'example01-2' port 5432 priority 2) - where 'id=10' - invalidate_query 'SQL_QUERY' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `host` – The host on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). -- `port` – The port on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). -- `user` – Name of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). -- `password` – Password of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). -- `replica` – Section of replica configurations. There can be multiple sections: - - `replica/host` – The PostgreSQL host. - - `replica/port` – The PostgreSQL port. - - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. -- `db` – Name of the database. -- `table` – Name of the table. -- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). -- `query` – The custom query. Optional parameter. - -:::note -The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. -::: - -## Null - -A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. - -``` sql -CREATE DICTIONARY null_dict ( - id UInt64, - val UInt8, - default_val UInt8 DEFAULT 123, - nullable_val Nullable(UInt8) -) -PRIMARY KEY id -SOURCE(NULL()) -LAYOUT(FLAT()) -LIFETIME(0); -``` - -## Related Content - -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md deleted file mode 100644 index 8271a342941..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure -sidebar_position: 44 -sidebar_label: Dictionary Key and Fields ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -# Dictionary Key and Fields - - - -The `structure` clause describes the dictionary key and fields available for queries. - -XML description: - -``` xml - - - - Id - - - - - - - ... - - - -``` - -Attributes are described in the elements: - -- `` — [Key column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key). -- `` — [Data column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. - -DDL query: - -``` sql -CREATE DICTIONARY dict_name ( - Id UInt64, - -- attributes -) -PRIMARY KEY Id -... -``` - -Attributes are described in the query body: - -- `PRIMARY KEY` — [Key column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) -- `AttrName AttrType` — [Data column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. - -## Key - -ClickHouse supports the following types of keys: - -- Numeric key. `UInt64`. Defined in the `` tag or using `PRIMARY KEY` keyword. -- Composite key. Set of values of different types. Defined in the tag `` or `PRIMARY KEY` keyword. - -An xml structure can contain either `` or ``. DDL-query must contain single `PRIMARY KEY`. - -:::warning -You must not describe key as an attribute. -::: - -### Numeric Key - -Type: `UInt64`. - -Configuration example: - -``` xml - - Id - -``` - -Configuration fields: - -- `name` – The name of the column with keys. - -For DDL-query: - -``` sql -CREATE DICTIONARY ( - Id UInt64, - ... -) -PRIMARY KEY Id -... -``` - -- `PRIMARY KEY` – The name of the column with keys. - -### Composite Key - -The key can be a `tuple` from any types of fields. The [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) in this case must be `complex_key_hashed` or `complex_key_cache`. - -:::tip -A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. -::: - -The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Example: - -``` xml - - - - field1 - String - - - field2 - UInt32 - - ... - -... -``` - -or - -``` sql -CREATE DICTIONARY ( - field1 String, - field2 String - ... -) -PRIMARY KEY field1, field2 -... -``` - -For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`. - -## Attributes - -Configuration example: - -``` xml - - ... - - Name - ClickHouseDataType - - rand64() - true - true - true - - -``` - -or - -``` sql -CREATE DICTIONARY somename ( - Name ClickHouseDataType DEFAULT '' EXPRESSION rand64() HIERARCHICAL INJECTIVE IS_OBJECT_ID -) -``` - -Configuration fields: - -| Tag | Description | Required | -|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| -| `name` | Column name. | Yes | -| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md),[Date](../../../sql-reference/data-types/date), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes | -| `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | -| `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | -| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).

Default value: `false`. | No | -| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).
If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.

Default value: `false`. | No | -| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. | No | - -**See Also** - -- [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md). - -## Related Content - -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md deleted file mode 100644 index a923511ca5e..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict -sidebar_position: 40 -sidebar_label: Configuring a Dictionary ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -# Configuring a Dictionary - - - -If dictionary is configured using xml file, than dictionary configuration has the following structure: - -``` xml - - dict_name - - - - - - - - - - - - - - - - - -``` - -Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md) has the following structure: - -``` sql -CREATE DICTIONARY dict_name -( - ... -- attributes -) -PRIMARY KEY ... -- complex or single key configuration -SOURCE(...) -- Source configuration -LAYOUT(...) -- Memory layout configuration -LIFETIME(...) -- Lifetime of dictionary in memory -``` - -- `name` – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. -- [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) — Source of the dictionary. -- [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) — Dictionary layout in memory. -- [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. -- [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates. - -## Related Content - -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md deleted file mode 100644 index 8621c68b428..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts -sidebar_position: 39 -sidebar_label: General Description ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -# Dictionaries - -:::tip Tutorial -If you are getting started with Dictionaries in ClickHouse we have a tutorial that covers that topic. Take a look [here](/docs/en/tutorial.md). -::: - -You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. - -ClickHouse: - -- Fully or partially stores dictionaries in RAM. -- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. -- Allows creating dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md). - -The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter. - -Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting. - -The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there: - -- Status of the dictionary. -- Configuration parameters. -- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. - - - -## Creating a dictionary with a DDL query - -Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries: -- No additional records are added to server configuration files -- The dictionaries can be worked with as first-class entities, like tables or views -- Data can be read directly, using familiar SELECT rather than dictionary table functions -- The dictionaries can be easily renamed - -## Creating a dictionary with a configuration file - -:::note -Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above), and create your dictionary as user `default`. -::: - -The dictionary configuration file has the following format: - -``` xml - - An optional element with any content. Ignored by the ClickHouse server. - - - /etc/metrika.xml - - - - - - - - -``` - -You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file. - - -:::note -You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries. -::: - -## See Also - -- [Configuring a Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) -- [Storing Dictionaries in Memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) -- [Dictionary Updates](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) -- [Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) -- [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) -- [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md) - -## Related Content - -- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/regexp-tree.md b/docs/en/sql-reference/dictionaries/external-dictionaries/regexp-tree.md deleted file mode 100644 index 5ad15b11d07..00000000000 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/regexp-tree.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/external-dictionaries/regexp-tree -sidebar_position: 47 -sidebar_label: RegExp Tree Dictionary -title: "RegExp Tree Dictionary" ---- -import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; - -Regexp Tree dictionary stores multiple trees of regular expressions with attributions. Users can retrieve strings in the dictionary. If a string matches the root of the regexp tree, we will collect the corresponding attributes of the matched root and continue to walk the children. If any of the children matches the string, we will collect attributes and rewrite the old ones if conflicts occur, then continue the traverse until we reach leaf nodes. - -Example of the ddl query for creating Regexp Tree dictionary: - - - -```sql -create dictionary regexp_dict -( - regexp String, - name String, - version String -) -PRIMARY KEY(regexp) -SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml')) -LAYOUT(regexp_tree) -... -``` - -We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true. - -**Source** - -We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like: - -```xml -- regexp: 'Linux/(\d+[\.\d]*).+tlinux' - name: 'TencentOS' - version: '\1' - -- regexp: '\d+/tclwebkit(?:\d+[\.\d]*)' - name: 'Andriod' - versions: - - regexp: '33/tclwebkit' - version: '13' - - regexp: '3[12]/tclwebkit' - version: '12' - - regexp: '30/tclwebkit' - version: '11' - - regexp: '29/tclwebkit' - version: '10' -``` - -The key `regexp` represents the regular expression of a tree node. The name of key is same as the dictionary key. The `name` and `version` is user-defined attributions in the dicitionary. The `versions` (which can be any name that not appear in attributions or the key) indicates the children nodes of this tree. - -**Back Reference** - -The value of an attribution could contain a back reference which refers to a capture group of the matched regular expression. Reference number ranges from 1 to 9 and writes as `$1` or `\1`. - -During the query execution, the back reference in the value will be replaced by the matched capture group. - -**Query** - -Due to the specialty of Regexp Tree dictionary, we only allow functions `dictGet`, `dictGetOrDefault` and `dictGetOrNull` work with it. - -Example: - -```sql -SELECT dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024'); -``` - -Result: - -``` -┌─dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024')─┐ -│ ('Andriod','12') │ -└─────────────────────────────────────────────────────────────────┘ -``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 9e6eed47d4a..f7b4be64851 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1,9 +1,12 @@ --- -slug: /en/sql-reference/dictionaries/ -sidebar_label: Dictionaries +slug: /en/sql-reference/dictionaries +sidebar_label: Defining Dictionaries sidebar_position: 35 --- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; +import CloudDetails from '@site/docs/en/sql-reference/dictionaries/_snippet_dictionary_in_cloud.md'; + # Dictionaries A dictionary is a mapping (`key -> attributes`) that is convenient for various types of reference lists. @@ -12,5 +15,2452 @@ ClickHouse supports special functions for working with dictionaries that can be ClickHouse supports: -- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). -- [Embedded dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). +- Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). +- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). + + +:::tip Tutorial +If you are getting started with Dictionaries in ClickHouse we have a tutorial that covers that topic. Take a look [here](/docs/en/tutorial.md). +::: + +You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](#dictionary-sources)”. + +ClickHouse: + +- Fully or partially stores dictionaries in RAM. +- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. +- Allows creating dictionaries with xml files or [DDL queries](../../sql-reference/statements/create/dictionary.md). + +The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter. + +Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting. + +The [dictionaries](../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there: + +- Status of the dictionary. +- Configuration parameters. +- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. + + + +## Creating a dictionary with a DDL query {#creating-a-dictionary-with-a-ddl-query} + +Dictionaries can be created with [DDL queries](../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries: +- No additional records are added to server configuration files +- The dictionaries can be worked with as first-class entities, like tables or views +- Data can be read directly, using familiar SELECT rather than dictionary table functions +- The dictionaries can be easily renamed + +## Creating a dictionary with a configuration file + +:::note +Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above), and create your dictionary as user `default`. +::: + +The dictionary configuration file has the following format: + +``` xml + + An optional element with any content. Ignored by the ClickHouse server. + + + /etc/metrika.xml + + + + + + + + +``` + +You can [configure](#configuring-a-dictionary) any number of dictionaries in the same file. + + +:::note +You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries. +::: + +## Configuring a Dictionary {#configuring-a-dictionary} + + + +If dictionary is configured using xml file, than dictionary configuration has the following structure: + +``` xml + + dict_name + + + + + + + + + + + + + + + + + +``` + +Corresponding [DDL-query](../../sql-reference/statements/create/dictionary.md) has the following structure: + +``` sql +CREATE DICTIONARY dict_name +( + ... -- attributes +) +PRIMARY KEY ... -- complex or single key configuration +SOURCE(...) -- Source configuration +LAYOUT(...) -- Memory layout configuration +LIFETIME(...) -- Lifetime of dictionary in memory +``` + +## Storing Dictionaries in Memory {#storig-dictionaries-in-memory} + +There are a variety of ways to store dictionaries in memory. + +We recommend [flat](#flat), [hashed](#hashed) and [complex_key_hashed](#complex_key_hashed), which provide optimal processing speed. + +Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section [cache](#cache). + +There are several ways to improve dictionary performance: + +- Call the function for working with the dictionary after `GROUP BY`. +- Mark attributes to extract as injective. An attribute is called injective if different attribute values correspond to different keys. So when `GROUP BY` uses a function that fetches an attribute value by the key, this function is automatically taken out of `GROUP BY`. + +ClickHouse generates an exception for errors with dictionaries. Examples of errors: + +- The dictionary being accessed could not be loaded. +- Error querying a `cached` dictionary. + +You can view the list of dictionaries and their statuses in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table. + + + +The configuration looks like this: + +``` xml + + + ... + + + + + + ... + + +``` + +Corresponding [DDL-query](../../sql-reference/statements/create/dictionary.md): + +``` sql +CREATE DICTIONARY (...) +... +LAYOUT(LAYOUT_TYPE(param value)) -- layout settings +... +``` + +Dictionaries without word `complex-key*` in a layout have a key with [UInt64](../../sql-reference/data-types/int-uint.md) type, `complex-key*` dictionaries have a composite key (complex, with arbitrary types). + +[UInt64](../../sql-reference/data-types/int-uint.md) keys in XML dictionaries are defined with `` tag. + +Configuration example (column key_column has UInt64 type): +```xml +... + + + key_column + +... +``` + +Composite `complex` keys XML dictionaries are defined `` tag. + +Configuration example of a composite key (key has one element with [String](../../sql-reference/data-types/string.md) type): +```xml +... + + + + country_code + String + + +... +``` + +## Ways to Store Dictionaries in Memory + +- [flat](#flat) +- [hashed](#hashed) +- [sparse_hashed](#sparse_hashed) +- [complex_key_hashed](#complex_key_hashed) +- [complex_key_sparse_hashed](#complex_key_sparse_hashed) +- [hashed_array](#hashed_array) +- [complex_key_hashed_array](#complex_key_hashed_array) +- [range_hashed](#range_hashed) +- [complex_key_range_hashed](#complex_key_range_hashed) +- [cache](#cache) +- [complex_key_cache](#complex_key_cache) +- [ssd_cache](#ssd_cache) +- [complex_key_ssd_cache](#complex_key_ssd_cache) +- [direct](#direct) +- [complex_key_direct](#complex_key_direct) +- [ip_trie](#ip_trie) + +### flat + +The dictionary is completely stored in memory in the form of flat arrays. How much memory does the dictionary use? The amount is proportional to the size of the largest key (in space used). + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type and the value is limited to `max_array_size` (by default — 500,000). If a larger key is discovered when creating the dictionary, ClickHouse throws an exception and does not create the dictionary. Dictionary flat arrays initial size is controlled by `initial_array_size` setting (by default — 1024). + +All types of sources are supported. When updating, data (from a file or from a table) is read in it entirety. + +This method provides the best performance among all available methods of storing the dictionary. + +Configuration example: + +``` xml + + + 50000 + 5000000 + + +``` + +or + +``` sql +LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000)) +``` + +### hashed + +The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. + +All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. + +Configuration example: + +``` xml + + + +``` + +or + +``` sql +LAYOUT(HASHED()) +``` + +Configuration example: + +``` xml + + + + 10 + + + 10000 + + + 0.5 + + +``` + +or + +``` sql +LAYOUT(HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5])) +``` + +### sparse_hashed + +Similar to `hashed`, but uses less memory in favor more CPU usage. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. + +Configuration example: + +``` xml + + + + + + + +``` + +or + +``` sql +LAYOUT(SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5])) +``` + +It is also possible to use `shards` for this type of dictionary, and again it is more important for `sparse_hashed` then for `hashed`, since `sparse_hashed` is slower. + +### complex_key_hashed + +This type of storage is for use with composite [keys](#dictionary-key-and-fields). Similar to `hashed`. + +Configuration example: + +``` xml + + + + + + + +``` + +or + +``` sql +LAYOUT(COMPLEX_KEY_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5])) +``` + +### complex_key_sparse_hashed + +This type of storage is for use with composite [keys](#dictionary-key-and-fields). Similar to [sparse_hashed](#sparse_hashed). + +Configuration example: + +``` xml + + + + + + + +``` + +or + +``` sql +LAYOUT(COMPLEX_KEY_SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5])) +``` + +### hashed_array + +The dictionary is completely stored in memory. Each attribute is stored in an array. The key attribute is stored in the form of a hashed table where value is an index in the attributes array. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. + +All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. + +Configuration example: + +``` xml + + + + +``` + +or + +``` sql +LAYOUT(HASHED_ARRAY()) +``` + +### complex_key_hashed_array + +This type of storage is for use with composite [keys](#dictionary-key-and-fields). Similar to [hashed_array](#hashed_array). + +Configuration example: + +``` xml + + + +``` + +or + +``` sql +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +``` + +### range_hashed {#range_hashed} + +The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. +This storage method works the same way as hashed and allows using date/time (arbitrary numeric type) ranges in addition to the key. + +Example: The table contains discounts for each advertiser in the format: + +``` text +┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐ +│ 123 │ 2015-01-16 │ 2015-01-31 │ 0.25 │ +│ 123 │ 2015-01-01 │ 2015-01-15 │ 0.15 │ +│ 456 │ 2015-01-01 │ 2015-01-15 │ 0.05 │ +└───────────────┴─────────────────────┴───────────────────┴────────┘ +``` + +To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](#dictionary-key-and-fields). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). + +:::note +Values of `range_min` and `range_max` should fit in `Int64` type. +::: + +Example: + +``` xml + + + + min + + + + + advertiser_id + + + discount_start_date + Date + + + discount_end_date + Date + + ... +``` + +or + +``` sql +CREATE DICTIONARY discounts_dict ( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Date, + amount Float64 +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'discounts')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(RANGE_HASHED(range_lookup_strategy 'max')) +RANGE(MIN discount_start_date MAX discount_end_date) +``` + +To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected: + +``` sql +dictGet('dict_name', 'attr_name', id, date) +``` +Query example: + +``` sql +SELECT dictGet('discounts_dict', 'amount', 1, '2022-10-20'::Date); +``` + +This function returns the value for the specified `id`s and the date range that includes the passed date. + +Details of the algorithm: + +- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type. +- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. +- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. +- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value. + +Configuration example: + +``` xml + + + ... + + + + + + + + Abcdef + + + StartTimeStamp + UInt64 + + + EndTimeStamp + UInt64 + + + XXXType + String + + + + + + +``` + +or + +``` sql +CREATE DICTIONARY somedict( + Abcdef UInt64, + StartTimeStamp UInt64, + EndTimeStamp UInt64, + XXXType String DEFAULT '' +) +PRIMARY KEY Abcdef +RANGE(MIN StartTimeStamp MAX EndTimeStamp) +``` + +Configuration example with overlapping ranges and open ranges: + +```sql +CREATE TABLE discounts +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +ENGINE = Memory; + +INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1); +INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2); +INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3); +INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4); +INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5); +INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6); + +SELECT * FROM discounts ORDER BY advertiser_id, discount_start_date; +┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐ +│ 1 │ 2015-01-01 │ ᴺᵁᴸᴸ │ 0.1 │ +│ 1 │ 2015-01-15 │ ᴺᵁᴸᴸ │ 0.2 │ +│ 2 │ 2015-01-01 │ 2015-01-15 │ 0.3 │ +│ 2 │ 2015-01-04 │ 2015-01-10 │ 0.4 │ +│ 3 │ 1970-01-01 │ 2015-01-15 │ 0.5 │ +│ 3 │ 1970-01-01 │ 2015-01-10 │ 0.6 │ +└───────────────┴─────────────────────┴───────────────────┴────────┘ + +-- RANGE_LOOKUP_STRATEGY 'max' + +CREATE DICTIONARY discounts_dict +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +PRIMARY KEY advertiser_id +SOURCE(CLICKHOUSE(TABLE discounts)) +LIFETIME(MIN 600 MAX 900) +LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max')) +RANGE(MIN discount_start_date MAX discount_end_date); + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res; +┌─res─┐ +│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null +└─────┘ + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res; +┌─res─┐ +│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1) +└─────┘ + +select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res; +┌─res─┐ +│ 0.4 │ -- two ranges are matching, range_min 2015-01-04 (0.4) is bigger than 2015-01-01 (0.3) +└─────┘ + +select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res; +┌─res─┐ +│ 0.5 │ -- two ranges are matching, range_min are equal, 2015-01-15 (0.5) is bigger than 2015-01-10 (0.6) +└─────┘ + +DROP DICTIONARY discounts_dict; + +-- RANGE_LOOKUP_STRATEGY 'min' + +CREATE DICTIONARY discounts_dict +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +PRIMARY KEY advertiser_id +SOURCE(CLICKHOUSE(TABLE discounts)) +LIFETIME(MIN 600 MAX 900) +LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'min')) +RANGE(MIN discount_start_date MAX discount_end_date); + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res; +┌─res─┐ +│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null +└─────┘ + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res; +┌─res─┐ +│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2) +└─────┘ + +select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res; +┌─res─┐ +│ 0.3 │ -- two ranges are matching, range_min 2015-01-01 (0.3) is less than 2015-01-04 (0.4) +└─────┘ + +select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res; +┌─res─┐ +│ 0.6 │ -- two ranges are matching, range_min are equal, 2015-01-10 (0.6) is less than 2015-01-15 (0.5) +└─────┘ +``` + +### complex_key_range_hashed + +The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values (see [range_hashed](#range_hashed)). This type of storage is for use with composite [keys](#dictionary-key-and-fields). + +Configuration example: + +``` sql +CREATE DICTIONARY range_dictionary +( + CountryID UInt64, + CountryKey String, + StartDate Date, + EndDate Date, + Tax Float64 DEFAULT 0.2 +) +PRIMARY KEY CountryID, CountryKey +SOURCE(CLICKHOUSE(TABLE 'date_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_RANGE_HASHED()) +RANGE(MIN StartDate MAX EndDate); +``` + +### cache + +The dictionary is stored in a cache that has a fixed number of cells. These cells contain frequently used elements. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. + +When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache. + +If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. + +For cache dictionaries, the expiration [lifetime](#dictionary-updates) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. + +This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table. + +If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source. + +To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally. + +All types of sources are supported. + +Example of settings: + +``` xml + + + + 1000000000 + + 0 + + 100000 + + 10 + + 60000 + + 4 + + +``` + +or + +``` sql +LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) +``` + +Set a large enough cache size. You need to experiment to select the number of cells: + +1. Set some value. +2. Run queries until the cache is completely full. +3. Assess memory consumption using the `system.dictionaries` table. +4. Increase or decrease the number of cells until the required memory consumption is reached. + +:::note +Do not use ClickHouse as a source, because it is slow to process queries with random reads. +::: + +### complex_key_cache + +This type of storage is for use with composite [keys](#dictionary-key-and-fields). Similar to `cache`. + +### ssd_cache + +Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. + +``` xml + + + + 4096 + + 16777216 + + 131072 + + 1048576 + + /var/lib/clickhouse/user_files/test_dict + + +``` + +or + +``` sql +LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 + PATH '/var/lib/clickhouse/user_files/test_dict')) +``` + +### complex_key_ssd_cache + +This type of storage is for use with composite [keys](#dictionary-key-and-fields). Similar to `ssd_cache`. + +### direct + +The dictionary is not stored in memory and directly goes to the source during the processing of a request. + +The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. + +All types of [sources](#dictionary-sources), except local files, are supported. + +Configuration example: + +``` xml + + + +``` + +or + +``` sql +LAYOUT(DIRECT()) +``` + +### complex_key_direct + +This type of storage is for use with composite [keys](#dictionary-key-and-fields). Similar to `direct`. + +### ip_trie + +This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN. + +**Example** + +Suppose we have a table in ClickHouse that contains our IP prefixes and mappings: + +```sql +CREATE TABLE my_ip_addresses ( + prefix String, + asn UInt32, + cca2 String +) +ENGINE = MergeTree +PRIMARY KEY prefix; +``` + +```sql +INSERT INTO my_ip_addresses VALUES + ('202.79.32.0/20', 17501, 'NP'), + ('2620:0:870::/48', 3856, 'US'), + ('2a02:6b8:1::/48', 13238, 'RU'), + ('2001:db8::/32', 65536, 'ZZ') +; +``` + +Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key: + +``` xml + + + + prefix + String + + + + asn + UInt32 + + + + cca2 + String + ?? + + ... + + + + + + true + + +``` + +or + +``` sql +CREATE DICTIONARY my_ip_trie_dictionary ( + prefix String, + asn UInt32, + cca2 String DEFAULT '??' +) +PRIMARY KEY prefix +SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses')) +LAYOUT(IP_TRIE) +LIFETIME(3600); +``` + +The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet. + +The syntax is: + +``` sql +dictGetT('dict_name', 'attr_name', ip) +``` + +The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example: + +``` sql +SELECT dictGet('my_ip_trie_dictionary', 'cca2', toIPv4('202.79.32.10')) AS result; + +┌─result─┐ +│ NP │ +└────────┘ + + +SELECT dictGet('my_ip_trie_dictionary', 'asn', IPv6StringToNum('2001:db8::1')) AS result; + +┌─result─┐ +│ 65536 │ +└────────┘ + + +SELECT dictGet('my_ip_trie_dictionary', ('asn', 'cca2'), IPv6StringToNum('2001:db8::1')) AS result; + +┌─result───────┐ +│ (65536,'ZZ') │ +└──────────────┘ +``` + +Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. + +Data must completely fit into RAM. + +## Dictionary Updates {#dictionary-updates} + +ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `lifetime` tag in seconds. + +Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. + +Example of settings: + + + +``` xml + + ... + 300 + ... + +``` + +or + +``` sql +CREATE DICTIONARY (...) +... +LIFETIME(300) +... +``` + +Setting `0` (`LIFETIME(0)`) prevents dictionaries from updating. + +You can set a time interval for updates, and ClickHouse will choose a uniformly random time within this range. This is necessary in order to distribute the load on the dictionary source when updating on a large number of servers. + +Example of settings: + +``` xml + + ... + + 300 + 360 + + ... + +``` + +or + +``` sql +LIFETIME(MIN 300 MAX 360) +``` + +If `0` and `0`, ClickHouse does not reload the dictionary by timeout. +In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. + +When updating the dictionaries, the ClickHouse server applies different logic depending on the type of [source](#dictionary-sources): + +- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. +- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`). +- Dictionaries from other sources are updated every time by default. + +For other sources (ODBC, PostgreSQL, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: + +- The dictionary table must have a field that always changes when the source data is updated. +- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](#dictionary-sources). + +Example of settings: + +``` xml + + ... + + ... + SELECT update_time FROM dictionary_source where id = 1 + + ... + +``` + +or + +``` sql +... +SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1')) +... +``` + +For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported. + +It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source. + +- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. +- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. +- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. + - Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example: + ```sql + ... + SOURCE(CLICKHOUSE(... + update_field 'added_time' + QUERY ' + SELECT my_arr.1 AS x, my_arr.2 AS y, creation_time + FROM ( + SELECT arrayZip(x_arr, y_arr) AS my_arr, creation_time + FROM dictionary_source + WHERE {condition} + )' + )) + ... + ``` + +If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. + +Example of settings: + +``` xml + + ... + + ... + added_time + 15 + + ... + +``` + +or + +``` sql +... +SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) +... +``` + +## Dictionary Sources {#dictionary-sources} + + + +A dictionary can be connected to ClickHouse from many different sources. + +If the dictionary is configured using an xml-file, the configuration looks like this: + +``` xml + + + ... + + + + + + ... + + ... + +``` + +In case of [DDL-query](../../sql-reference/statements/create/dictionary.md), the configuration described above will look like: + +``` sql +CREATE DICTIONARY dict_name (...) +... +SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration +... +``` + +The source is configured in the `source` section. + +For source types [Local file](#local_file), [Executable file](#executable), [HTTP(s)](#https), [ClickHouse](#clickhouse) +optional settings are available: + +``` xml + + + /opt/dictionaries/os.tsv + TabSeparated + + + 0 + + +``` + +or + +``` sql +SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated')) +SETTINGS(format_csv_allow_single_quotes = 0) +``` + +Types of sources (`source_type`): + +- [Local file](#local_file) +- [Executable File](#executable) +- [Executable Pool](#executable_pool) +- [HTTP(s)](#http) +- DBMS + - [ODBC](#odbc) + - [MySQL](#mysql) + - [ClickHouse](#clickhouse) + - [MongoDB](#mongodb) + - [Redis](#redis) + - [Cassandra](#cassandra) + - [PostgreSQL](#postgresql) + +## Local File {#local_file} + +Example of settings: + +``` xml + + + /opt/dictionaries/os.tsv + TabSeparated + + +``` + +or + +``` sql +SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated')) +``` + +Setting fields: + +- `path` – The absolute path to the file. +- `format` – The file format. All the formats described in [Formats](../../interfaces/formats.md#formats) are supported. + +When a dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in the `user_files` directory to prevent DB users from accessing arbitrary files on the ClickHouse node. + +**See Also** + +- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) + +## Executable File {#executable} + +Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. + +Example of settings: + +``` xml + + + cat /opt/dictionaries/os.tsv + TabSeparated + false + + +``` + +Setting fields: + +- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`). +- `format` — The file format. All the formats described in [Formats](../../interfaces/formats.md#formats) are supported. +- `command_termination_timeout` — The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter. +- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. + +That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. + +## Executable Pool {#executable_pool} + +Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. + +Executable pool will spawn a pool of processes with the specified command and keep them running until they exit. The program should read data from STDIN while it is available and output the result to STDOUT. It can wait for the next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data, but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early. + +Example of settings: + +``` xml + + + while read key; do printf "$key\tData for key $key\n"; done + TabSeparated + 10 + 10 + false + + +``` + +Setting fields: + +- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). +- `format` — The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. +- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. +- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. + +That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. + +## Http(s) {#https} + +Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. + +Example of settings: + +``` xml + + + http://[::1]/os.tsv + TabSeparated + + user + password + + +
+ API-KEY + key +
+
+
+ +``` + +or + +``` sql +SOURCE(HTTP( + url 'http://[::1]/os.tsv' + format 'TabSeparated' + credentials(user 'user' password 'password') + headers(header(name 'API-KEY' value 'key')) +)) +``` + +In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl) in the server configuration. + +Setting fields: + +- `url` – The source URL. +- `format` – The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `credentials` – Basic HTTP authentication. Optional parameter. +- `user` – Username required for the authentication. +- `password` – Password required for the authentication. +- `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter. +- `header` – Single HTTP header entry. +- `name` – Identifiant name used for the header send on the request. +- `value` – Value set for a specific identifiant name. + +When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. + +### Known Vulnerability of the ODBC Dictionary Functionality + +:::note +When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. +::: + +**Example of insecure use** + +Let’s configure unixODBC for PostgreSQL. Content of `/etc/odbc.ini`: + +``` text +[gregtest] +Driver = /usr/lib/psqlodbca.so +Servername = localhost +PORT = 5432 +DATABASE = test_db +#OPTION = 3 +USERNAME = test +PASSWORD = test +``` + +If you then make a query such as + +``` sql +SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); +``` + +ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. + +### Example of Connecting Postgresql + +Ubuntu OS. + +Installing unixODBC and the ODBC driver for PostgreSQL: + +``` bash +$ sudo apt-get install -y unixodbc odbcinst odbc-postgresql +``` + +Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that runs ClickHouse): + +``` text + [DEFAULT] + Driver = myconnection + + [myconnection] + Description = PostgreSQL connection to my_db + Driver = PostgreSQL Unicode + Database = my_db + Servername = 127.0.0.1 + UserName = username + Password = password + Port = 5432 + Protocol = 9.3 + ReadOnly = No + RowVersioning = No + ShowSystemTables = No + ConnSettings = +``` + +The dictionary configuration in ClickHouse: + +``` xml + + + table_name + + + + + DSN=myconnection + postgresql_table
+
+ + + 300 + 360 + + + + + + + id + + + some_column + UInt64 + 0 + + +
+
+``` + +or + +``` sql +CREATE DICTIONARY table_name ( + id UInt64, + some_column UInt64 DEFAULT 0 +) +PRIMARY KEY id +SOURCE(ODBC(connection_string 'DSN=myconnection' table 'postgresql_table')) +LAYOUT(HASHED()) +LIFETIME(MIN 300 MAX 360) +``` + +You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. + +### Example of Connecting MS SQL Server + +Ubuntu OS. + +Installing the ODBC driver for connecting to MS SQL: + +``` bash +$ sudo apt-get install tdsodbc freetds-bin sqsh +``` + +Configuring the driver: + +```bash + $ cat /etc/freetds/freetds.conf + ... + + [MSSQL] + host = 192.168.56.101 + port = 1433 + tds version = 7.0 + client charset = UTF-8 + + # test TDS connection + $ sqsh -S MSSQL -D database -U user -P password + + + $ cat /etc/odbcinst.ini + + [FreeTDS] + Description = FreeTDS + Driver = /usr/lib/x86_64-linux-gnu/odbc/libtdsodbc.so + Setup = /usr/lib/x86_64-linux-gnu/odbc/libtdsS.so + FileUsage = 1 + UsageCount = 5 + + $ cat /etc/odbc.ini + # $ cat ~/.odbc.ini # if you signed in under a user that runs ClickHouse + + [MSSQL] + Description = FreeTDS + Driver = FreeTDS + Servername = MSSQL + Database = test + UID = test + PWD = test + Port = 1433 + + + # (optional) test ODBC connection (to use isql-tool install the [unixodbc](https://packages.debian.org/sid/unixodbc)-package) + $ isql -v MSSQL "user" "password" +``` + +Remarks: +- to determine the earliest TDS version that is supported by a particular SQL Server version, refer to the product documentation or look at [MS-TDS Product Behavior](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-tds/135d0ebe-5c4c-4a94-99bf-1811eccb9f4a) + +Configuring the dictionary in ClickHouse: + +``` xml + + + test + + + dict
+ DSN=MSSQL;UID=test;PWD=test +
+ + + + 300 + 360 + + + + + + + + + k + + + s + String + + + +
+
+``` + +or + +``` sql +CREATE DICTIONARY test ( + k UInt64, + s String DEFAULT '' +) +PRIMARY KEY k +SOURCE(ODBC(table 'dict' connection_string 'DSN=MSSQL;UID=test;PWD=test')) +LAYOUT(FLAT()) +LIFETIME(MIN 300 MAX 360) +``` + +## DBMS + +### ODBC + +You can use this method to connect any database that has an ODBC driver. + +Example of settings: + +``` xml + + + DatabaseName + ShemaName.TableName
+ DSN=some_parameters + SQL_QUERY + SELECT id, value_1, value_2 FROM ShemaName.TableName +
+ +``` + +or + +``` sql +SOURCE(ODBC( + db 'DatabaseName' + table 'SchemaName.TableName' + connection_string 'DSN=some_parameters' + invalidate_query 'SQL_QUERY' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `table` – Name of the table and schema if exists. +- `connection_string` – Connection string. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. + +:::note +The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +::: + +ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. + +If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. + +### Mysql + +Example of settings: + +``` xml + + + 3306 + clickhouse + qwerty + + example01-1 + 1 + + + example01-2 + 1 + + db_name + table_name
+ id=10 + SQL_QUERY + true + SELECT id, value_1, value_2 FROM db_name.table_name +
+ +``` + +or + +``` sql +SOURCE(MYSQL( + port 3306 + user 'clickhouse' + password 'qwerty' + replica(host 'example01-1' priority 1) + replica(host 'example01-2' priority 1) + db 'db_name' + table 'table_name' + where 'id=10' + invalidate_query 'SQL_QUERY' + fail_on_connection_loss 'true' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `port` – The port on the MySQL server. You can specify it for all replicas, or for each one individually (inside ``). + +- `user` – Name of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). + +- `password` – Password of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). + +- `replica` – Section of replica configurations. There can be multiple sections. + + - `replica/host` – The MySQL host. + - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. + +- `db` – Name of the database. + +- `table` – Name of the table. + +- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. + +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). + +- `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`. + +- `query` – The custom query. Optional parameter. + +:::note +The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +::: + +:::note +There is no explicit parameter `secure`. When establishing an SSL-connection security is mandatory. +::: + +MySQL can be connected to on a local host via sockets. To do this, set `host` and `socket`. + +Example of settings: + +``` xml + + + localhost + /path/to/socket/file.sock + clickhouse + qwerty + db_name + table_name
+ id=10 + SQL_QUERY + true + SELECT id, value_1, value_2 FROM db_name.table_name +
+ +``` + +or + +``` sql +SOURCE(MYSQL( + host 'localhost' + socket '/path/to/socket/file.sock' + user 'clickhouse' + password 'qwerty' + db 'db_name' + table 'table_name' + where 'id=10' + invalidate_query 'SQL_QUERY' + fail_on_connection_loss 'true' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +### ClickHouse + +Example of settings: + +``` xml + + + example01-01-1 + 9000 + default + + default + ids
+ id=10 + 1 + SELECT id, value_1, value_2 FROM default.ids +
+ +``` + +or + +``` sql +SOURCE(CLICKHOUSE( + host 'example01-01-1' + port 9000 + user 'default' + password '' + db 'default' + table 'ids' + where 'id=10' + secure 1 + query 'SELECT id, value_1, value_2 FROM default.ids' +)); +``` + +Setting fields: + +- `host` – The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [Distributed](../../engines/table-engines/special/distributed.md) table and enter it in subsequent configurations. +- `port` – The port on the ClickHouse server. +- `user` – Name of the ClickHouse user. +- `password` – Password of the ClickHouse user. +- `db` – Name of the database. +- `table` – Name of the table. +- `where` – The selection criteria. May be omitted. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `secure` - Use ssl for connection. +- `query` – The custom query. Optional parameter. + +:::note +The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +::: + +### Mongodb + +Example of settings: + +``` xml + + + localhost + 27017 + + + test + dictionary_source + ssl=true + + +``` + +or + +``` sql +SOURCE(MONGODB( + host 'localhost' + port 27017 + user '' + password '' + db 'test' + collection 'dictionary_source' + options 'ssl=true' +)) +``` + +Setting fields: + +- `host` – The MongoDB host. +- `port` – The port on the MongoDB server. +- `user` – Name of the MongoDB user. +- `password` – Password of the MongoDB user. +- `db` – Name of the database. +- `collection` – Name of the collection. +- `options` - MongoDB connection string options (optional parameter). + + +### Redis + +Example of settings: + +``` xml + + + localhost + 6379 + simple + 0 + + +``` + +or + +``` sql +SOURCE(REDIS( + host 'localhost' + port 6379 + storage_type 'simple' + db_index 0 +)) +``` + +Setting fields: + +- `host` – The Redis host. +- `port` – The port on the Redis server. +- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. +- `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. + +### Cassandra + +Example of settings: + +``` xml + + + localhost + 9042 + username + qwerty123 + database_name + table_name + 1 + 1 + One + "SomeColumn" = 42 + 8 + SELECT id, value_1, value_2 FROM database_name.table_name + + +``` + +Setting fields: + +- `host` – The Cassandra host or comma-separated list of hosts. +- `port` – The port on the Cassandra servers. If not specified, default port 9042 is used. +- `user` – Name of the Cassandra user. +- `password` – Password of the Cassandra user. +- `keyspace` – Name of the keyspace (database). +- `column_family` – Name of the column family (table). +- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. +- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key). +- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`. +- `where` – Optional selection criteria. +- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. +- `query` – The custom query. Optional parameter. + +:::note +The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. +::: + +### PostgreSQL + +Example of settings: + +``` xml + + + 5432 + clickhouse + qwerty + db_name + table_name
+ id=10 + SQL_QUERY + SELECT id, value_1, value_2 FROM db_name.table_name +
+ +``` + +or + +``` sql +SOURCE(POSTGRESQL( + port 5432 + host 'postgresql-hostname' + user 'postgres_user' + password 'postgres_password' + db 'db_name' + table 'table_name' + replica(host 'example01-1' port 5432 priority 1) + replica(host 'example01-2' port 5432 priority 2) + where 'id=10' + invalidate_query 'SQL_QUERY' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `host` – The host on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). +- `port` – The port on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). +- `user` – Name of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). +- `password` – Password of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). +- `replica` – Section of replica configurations. There can be multiple sections: + - `replica/host` – The PostgreSQL host. + - `replica/port` – The PostgreSQL port. + - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. +- `db` – Name of the database. +- `table` – Name of the table. +- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. + +:::note +The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. +::: + +## Null + +A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. + +``` sql +CREATE DICTIONARY null_dict ( + id UInt64, + val UInt8, + default_val UInt8 DEFAULT 123, + nullable_val Nullable(UInt8) +) +PRIMARY KEY id +SOURCE(NULL()) +LAYOUT(FLAT()) +LIFETIME(0); +``` + +## Dictionary Key and Fields {#dictionary-key-and-fields} + + + +The `structure` clause describes the dictionary key and fields available for queries. + +XML description: + +``` xml + + + + Id + + + + + + + ... + + + +``` + +Attributes are described in the elements: + +- `` — Key column +- `` — Data column: there can be a multiple number of attributes. + +DDL query: + +``` sql +CREATE DICTIONARY dict_name ( + Id UInt64, + -- attributes +) +PRIMARY KEY Id +... +``` + +Attributes are described in the query body: + +- `PRIMARY KEY` — Key column +- `AttrName AttrType` — Data column. There can be a multiple number of attributes. + +## Key + +ClickHouse supports the following types of keys: + +- Numeric key. `UInt64`. Defined in the `` tag or using `PRIMARY KEY` keyword. +- Composite key. Set of values of different types. Defined in the tag `` or `PRIMARY KEY` keyword. + +An xml structure can contain either `` or ``. DDL-query must contain single `PRIMARY KEY`. + +:::note +You must not describe key as an attribute. +::: + +### Numeric Key + +Type: `UInt64`. + +Configuration example: + +``` xml + + Id + +``` + +Configuration fields: + +- `name` – The name of the column with keys. + +For DDL-query: + +``` sql +CREATE DICTIONARY ( + Id UInt64, + ... +) +PRIMARY KEY Id +... +``` + +- `PRIMARY KEY` – The name of the column with keys. + +### Composite Key + +The key can be a `tuple` from any types of fields. The [layout](#storig-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`. + +:::tip +A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. +::: + +The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](#dictionary-key-and-fields). Example: + +``` xml + + + + field1 + String + + + field2 + UInt32 + + ... + +... +``` + +or + +``` sql +CREATE DICTIONARY ( + field1 String, + field2 String + ... +) +PRIMARY KEY field1, field2 +... +``` + +For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`. + +## Attributes + +Configuration example: + +``` xml + + ... + + Name + ClickHouseDataType + + rand64() + true + true + true + + +``` + +or + +``` sql +CREATE DICTIONARY somename ( + Name ClickHouseDataType DEFAULT '' EXPRESSION rand64() HIERARCHICAL INJECTIVE IS_OBJECT_ID +) +``` + +Configuration fields: + +| Tag | Description | Required | +|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| `name` | Column name. | Yes | +| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes | +| `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | +| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | +| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).

Default value: `false`. | No | +| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).
If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.

Default value: `false`. | No | +| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. + +## Hierarchical Dictionaries {#hierarchical-dictionaries} + +ClickHouse supports hierarchical dictionaries with a [numeric key](#numeric-key). + +Look at the following hierarchical structure: + +``` text +0 (Common parent) +│ +├── 1 (Russia) +│ │ +│ └── 2 (Moscow) +│ │ +│ └── 3 (Center) +│ +└── 4 (Great Britain) + │ + └── 5 (London) +``` + +This hierarchy can be expressed as the following dictionary table. + +| region_id | parent_region | region_name | +|------------|----------------|---------------| +| 1 | 0 | Russia | +| 2 | 1 | Moscow | +| 3 | 2 | Center | +| 4 | 0 | Great Britain | +| 5 | 4 | London | + +This table contains a column `parent_region` that contains the key of the nearest parent for the element. + +ClickHouse supports the hierarchical property for external dictionary attributes. This property allows you to configure the hierarchical dictionary similar to described above. + +The [dictGetHierarchy](../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy) function allows you to get the parent chain of an element. + +For our example, the structure of dictionary can be the following: + +``` xml + + + + region_id + + + + parent_region + UInt64 + 0 + true + + + + region_name + String + + + + + +``` + +## Polygon dictionaries {#polygon-dictionaries} + +Polygon dictionaries allow you to efficiently search for the polygon containing specified points. +For example: defining a city area by geographical coordinates. + +Example of a polygon dictionary configuration: + + + +``` xml + + + + + key + Array(Array(Array(Array(Float64)))) + + + + + name + String + + + + + value + UInt64 + 0 + + + + + + 1 + + + + ... + +``` + +The corresponding [DDL-query](../../sql-reference/statements/create/dictionary.md#create-dictionary-query): +``` sql +CREATE DICTIONARY polygon_dict_name ( + key Array(Array(Array(Array(Float64)))), + name String, + value UInt64 +) +PRIMARY KEY key +LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) +... +``` + +When configuring the polygon dictionary, the key must have one of two types: + +- A simple polygon. It is an array of points. +- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. + +Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported. + +The user can upload their own data in all formats supported by ClickHouse. + +There are 3 types of [in-memory storage](#storig-dictionaries-in-memory) available: + +- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. + +- `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). +Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. +The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. +The division stops when the recursion depth reaches `MAX_DEPTH` or when the cell crosses no more than `MIN_INTERSECTIONS` polygons. +To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. + +- `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. + +- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`. + +Dictionary queries are carried out using standard [functions](../../sql-reference/functions/ext-dict-functions.md) for working with dictionaries. +An important difference is that here the keys will be the points for which you want to find the polygon containing them. + +**Example** + +Example of working with the dictionary defined above: + +``` sql +CREATE TABLE points ( + x Float64, + y Float64 +) +... +SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y; +``` + +As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output. + +**Example** + +You can read columns from polygon dictionaries via SELECT query, just turn on the `store_polygon_key_column = 1` in the dictionary configuration or corresponding DDL-query. + +Query: + +``` sql +CREATE TABLE polygons_test_table +( + key Array(Array(Array(Tuple(Float64, Float64)))), + name String +) ENGINE = TinyLog; + +INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value'); + +CREATE DICTIONARY polygons_test_dictionary +( + key Array(Array(Array(Tuple(Float64, Float64)))), + name String +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE 'polygons_test_table')) +LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) +LIFETIME(0); + +SELECT * FROM polygons_test_dictionary; +``` + +Result: + +``` text +┌─key─────────────────────────────┬─name──┐ +│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │ +└─────────────────────────────────┴───────┘ +``` + +## Regular Expression Tree Dictionary {#regexp-tree-dictionary} + +Regular expression tree dictionaries are a special type of dictionary which represent the mapping from key to attributes using a tree of regular expressions. There are some use cases, e.g. parsing of [user agent](https://en.wikipedia.org/wiki/User_agent) strings, which can be expressed elegantly with regexp tree dictionaries. + +### Use Regular Expression Tree Dictionary in ClickHouse Open-Source + +Regular expression tree dictionaries are defined in ClickHouse open-source using the YAMLRegExpTree source which is provided the path to a YAML file containing the regular expression tree. + +```sql +CREATE DICTIONARY regexp_dict +( + regexp String, + name String, + version String +) +PRIMARY KEY(regexp) +SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml')) +LAYOUT(regexp_tree) +... +``` + +The dictionary source `YAMLRegExpTree` represents the structure of a regexp tree. For example: + +```yaml +- regexp: 'Linux/(\d+[\.\d]*).+tlinux' + name: 'TencentOS' + version: '\1' + +- regexp: '\d+/tclwebkit(?:\d+[\.\d]*)' + name: 'Android' + versions: + - regexp: '33/tclwebkit' + version: '13' + - regexp: '3[12]/tclwebkit' + version: '12' + - regexp: '30/tclwebkit' + version: '11' + - regexp: '29/tclwebkit' + version: '10' +``` + +This config consists of a list of regular expression tree nodes. Each node has the following structure: + +- **regexp**: the regular expression of the node. +- **attributes**: a list of user-defined dictionary attributes. In this example, there are two attributes: `name` and `version`. The first node defines both attributes. The second node only defines attribute `name`. Attribute `version` is provided by the child nodes of the second node. + - The value of an attribute may contain **back references**, referring to capture groups of the matched regular expression. In the example, the value of attribute `version` in the first node consists of a back-reference `\1` to capture group `(\d+[\.\d]*)` in the regular expression. Back-reference numbers range from 1 to 9 and are written as `$1` or `\1` (for number 1). The back reference is replaced by the matched capture group during query execution. +- **child nodes**: a list of children of a regexp tree node, each of which has its own attributes and (potentially) children nodes. String matching proceeds in a depth-first fashion. If a string matches a regexp node, the dictionary checks if it also matches the nodes' child nodes. If that is the case, the attributes of the deepest matching node are assigned. Attributes of a child node overwrite equally named attributes of parent nodes. The name of child nodes in YAML files can be arbitrary, e.g. `versions` in above example. + +Regexp tree dictionaries only allow access using the functions `dictGet` and `dictGetOrDefault`. + +Example: + +```sql +SELECT dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024'); +``` + +Result: + +```text +┌─dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024')─┐ +│ ('Android','12') │ +└─────────────────────────────────────────────────────────────────┘ +``` + +In this case, we first match the regular expression `\d+/tclwebkit(?:\d+[\.\d]*)` in the top layer's second node. The dictionary then continues to look into the child nodes and finds that the string also matches `3[12]/tclwebkit`. As a result, the value of attribute `name` is `Android` (defined in the first layer) and the value of attribute `version` is `12` (defined the child node). + +With a powerful YAML configure file, we can use a regexp tree dictionaries as a user agent string parser. We support [uap-core](https://github.com/ua-parser/uap-core) and demonstrate how to use it in the functional test [02504_regexp_dictionary_ua_parser](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh) + +### Use Regular Expression Tree Dictionary in ClickHouse Cloud + +Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause. + +```sql +SELECT * FROM dictionary(regexp_dict) INTO OUTFILE('regexp_dict.csv') +``` + +The content of csv file is: + +```text +1,0,"Linux/(\d+[\.\d]*).+tlinux","['version','name']","['\\1','TencentOS']" +2,0,"(\d+)/tclwebkit(\d+[\.\d]*)","['comment','version','name']","['test $1 and $2','$1','Android']" +3,2,"33/tclwebkit","['version']","['13']" +4,2,"3[12]/tclwebkit","['version']","['12']" +5,2,"3[12]/tclwebkit","['version']","['11']" +6,2,"3[12]/tclwebkit","['version']","['10']" +``` + +The schema of dumped file is: + +- `id UInt64`: the id of the RegexpTree node. +- `parent_id UInt64`: the id of the parent of a node. +- `regexp String`: the regular expression string. +- `keys Array(String)`: the names of user-defined attributes. +- `values Array(String)`: the values of user-defined attributes. + +To create the dictionary in ClickHouse Cloud, first create a table `regexp_dictionary_source_table` with below table structure: + +```sql +CREATE TABLE regexp_dictionary_source_table +( + id UInt64, + parent_id UInt64, + regexp String, + keys Array(String), + values Array(String) +) ENGINE=Memory; +``` + +Then update the local CSV by + +```bash +clickhouse client \ + --host MY_HOST \ + --secure \ + --password MY_PASSWORD \ + --query " + INSERT INTO regexp_dictionary_source_table + SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') + FORMAT CSV" < regexp_dict.csv +``` + +You can see how to [Insert Local Files](https://clickhouse.com/docs/en/integrations/data-ingestion/insert-local-files) for more details. After we initialize the source table, we can create a RegexpTree by table source: + +``` sql +CREATE DICTIONARY regexp_dict +( + regexp String, + name String, + version String +PRIMARY KEY(regexp) +SOURCE(CLICKHOUSE(TABLE 'regexp_dictionary_source_table')) +LIFETIME(0) +LAYOUT(regexp_tree); +``` + +## Embedded Dictionaries {#embedded-dictionaries} + + + +ClickHouse contains a built-in feature for working with a geobase. + +This allows you to: + +- Use a region’s ID to get its name in the desired language. +- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. +- Check whether a region is part of another region. +- Get a chain of parent regions. + +All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. + +The internal dictionaries are disabled in the default package. +To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. + +The geobase is loaded from text files. + +Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. + +Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. + +You can also create these files yourself. The file format is as follows: + +`regions_hierarchy*.txt`: TabSeparated (no header), columns: + +- region ID (`UInt32`) +- parent region ID (`UInt32`) +- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values +- population (`UInt32`) — optional column + +`regions_names_*.txt`: TabSeparated (no header), columns: + +- region ID (`UInt32`) +- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. + +A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. + +Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. +For updates, the file modification times are checked. If a file has changed, the dictionary is updated. +The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. +Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. + +We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. + +There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. + +## Embedded Dictionaries + + + +ClickHouse contains a built-in feature for working with a geobase. + +This allows you to: + +- Use a region’s ID to get its name in the desired language. +- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. +- Check whether a region is part of another region. +- Get a chain of parent regions. + +All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. + +The internal dictionaries are disabled in the default package. +To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. + +The geobase is loaded from text files. + +Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. + +Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. + +You can also create these files yourself. The file format is as follows: + +`regions_hierarchy*.txt`: TabSeparated (no header), columns: + +- region ID (`UInt32`) +- parent region ID (`UInt32`) +- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values +- population (`UInt32`) — optional column + +`regions_names_*.txt`: TabSeparated (no header), columns: + +- region ID (`UInt32`) +- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. + +A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. + +Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. +For updates, the file modification times are checked. If a file has changed, the dictionary is updated. +The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. +Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. + +We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. + +There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. diff --git a/docs/en/sql-reference/dictionaries/internal-dicts.md b/docs/en/sql-reference/dictionaries/internal-dicts.md deleted file mode 100644 index 11c6ee93aa6..00000000000 --- a/docs/en/sql-reference/dictionaries/internal-dicts.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -slug: /en/sql-reference/dictionaries/internal-dicts -sidebar_position: 39 -sidebar_label: Embedded Dictionaries ---- -import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; - -# Embedded Dictionaries - - - -ClickHouse contains a built-in feature for working with a geobase. - -This allows you to: - -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. - -All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. - -The internal dictionaries are disabled in the default package. -To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. - -The geobase is loaded from text files. - -Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. - -Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. - -You can also create these files yourself. The file format is as follows: - -`regions_hierarchy*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column - -`regions_names_*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. - -A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. - -Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. -For updates, the file modification times are checked. If a file has changed, the dictionary is updated. -The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. -Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. - -There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. diff --git a/docs/en/sql-reference/distributed-ddl.md b/docs/en/sql-reference/distributed-ddl.md index ff5155391be..d170f3765c2 100644 --- a/docs/en/sql-reference/distributed-ddl.md +++ b/docs/en/sql-reference/distributed-ddl.md @@ -18,6 +18,6 @@ In order to run these queries correctly, each host must have the same cluster de The local version of the query will eventually be executed on each host in the cluster, even if some hosts are currently not available. -:::warning +:::important The order for executing queries within a single host is guaranteed. -::: \ No newline at end of file +::: diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index c5244cf62e3..64fae0e82f0 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -1,12 +1,14 @@ --- slug: /en/sql-reference/functions/arithmetic-functions -sidebar_position: 34 +sidebar_position: 5 sidebar_label: Arithmetic --- # Arithmetic Functions -For all arithmetic functions, the result type is calculated as the smallest number type that the result fits in, if there is such a type. The minimum is taken simultaneously based on the number of bits, whether it is signed, and whether it floats. If there are not enough bits, the highest bit type is taken. +The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. + +The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. Example: @@ -20,39 +22,78 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 └───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘ ``` -Arithmetic functions work for any pair of types from UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, or Float64. +Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values. -Overflow is produced the same way as in C++. +Overflows are produced the same way as in C++. -## plus(a, b), a + b operator +## plus -Calculates the sum of the numbers. -You can also add integer numbers with a date or date and time. In the case of a date, adding an integer means adding the corresponding number of days. For a date with time, it means adding the corresponding number of seconds. +Calculates the sum of two values `a` and `b`. -## minus(a, b), a - b operator +**Syntax** -Calculates the difference. The result is always signed. +```sql +plus(a, b) +``` -You can also calculate integer numbers from a date or date with time. The idea is the same – see above for ‘plus’. +It is possible to add an integer and a date or date with time. The former operation increments the number of days in the date, the latter operation increments the number of seconds in the date with time. -## multiply(a, b), a \* b operator +Alias: `a + b` (operator) -Calculates the product of the numbers. +## minus -## divide(a, b), a / b operator +Calculates the difference of two values `a` and `b`. The result is always signed. -Calculates the quotient of the numbers. The result type is always a floating-point type. -It is not integer division. For integer division, use the ‘intDiv’ function. -When dividing by zero you get ‘inf’, ‘-inf’, or ‘nan’. +Similar to `plus`, it is possible to subtract an integer from a date or date with time. -## intDiv(a, b) +**Syntax** -Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value). +```sql +minus(a, b) +``` -Returns an integer of the type of the dividend (the first parameter). +Alias: `a - b` (operator) + +## multiply + +Calculates the product of two values `a` and `b`. + +**Syntax** + +```sql +multiply(a, b) +``` + +Alias: `a \* b` (operator) + +## divide + +Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function. + +Division by 0 returns `inf`, `-inf`, or `nan`. + +**Syntax** + +```sql +divide(a, b) +``` + +Alias: `a / b` (operator) + +## intDiv + +Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer. + +The result has the same type as the dividend (the first parameter). An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. +**Syntax** + +```sql +intDiv(a, b) +``` + **Example** Query: @@ -62,6 +103,7 @@ SELECT intDiv(toFloat64(1), 0.001) AS res, toTypeName(res) ``` + ```response ┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐ │ 1000 │ Int64 │ @@ -73,30 +115,65 @@ SELECT intDiv(1, 0.001) AS res, toTypeName(res) ``` + ```response Received exception from server (version 23.2.1): Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION) ``` -## intDivOrZero(a, b) +## intDivOrZero -Differs from ‘intDiv’ in that it returns zero when dividing by zero or when dividing a minimal negative number by minus one. +Same as `intDiv` but returns zero when dividing by zero or when dividing a minimal negative number by minus one. -## modulo(a, b), a % b operator +**Syntax** + +```sql +intDivOrZero(a, b) +``` + +## modulo + +Calculates the remainder of the division of two values `a` by `b`. -Calculates the remainder when dividing `a` by `b`. The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number. + The remainder is computed like in C++. Truncated division is used for negative numbers. + An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. -## moduloOrZero(a, b) +**Syntax** -Differs from [modulo](#modulo) in that it returns zero when the divisor is zero. +```sql +modulo(a, b) +``` -## positiveModulo(a, b), positive_modulo(a, b), pmod(a, b) -Calculates the remainder when dividing `a` by `b`. Similar to the function `modulo` except that `positive_modulo` always returns a non-negative number. +Alias: `a % b` (operator) -Notice that `positive_modulo` is 4-5 times slower than `modulo`. You should not use `positive_modulo` unless you want to get a positive result and don't care about performance too much. +## moduloOrZero + +Like [modulo](#modulo) but returns zero when the divisor is zero. + +**Syntax** + +```sql +moduloOrZero(a, b) +``` + +## positiveModulo(a, b) + +Like [modulo](#modulo) but always returns a non-negative number. + +This function is 4-5 times slower than `modulo`. + +**Syntax** + +```sql +positiveModulo(a, b) +``` + +Alias: +- `positive_modulo(a, b)` +- `pmod(a, b)` **Example** @@ -108,51 +185,67 @@ SELECT positiveModulo(-1, 10) Result: -```text - +```result ┌─positiveModulo(-1, 10)─┐ │ 9 │ └────────────────────────┘ ``` -## negate(a), -a operator +## negate -Calculates a number with the reverse sign. The result is always signed. - -## abs(a) - -Calculates the absolute value of the number (a). That is, if a \< 0, it returns -a. For unsigned types it does not do anything. For signed integer types, it returns an unsigned number. - -## gcd(a, b) - -Returns the greatest common divisor of the numbers. -An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. - -## lcm(a, b) - -Returns the least common multiple of the numbers. -An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. - -## max2 - -Compares two values and returns the maximum. The returned value is converted to [Float64](../../sql-reference/data-types/float.md). +Negates a value `a`. The result is always signed. **Syntax** ```sql -max2(value1, value2) +negate(a) ``` -**Arguments** +Alias: `-a` -- `value1` — First value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). -- `value2` — Second value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +## abs -**Returned value** +Calculates the absolute value of `a`. Has no effect if `a` is of an unsigned type. If `a` is of a signed type, it returns an unsigned number. -- The maximum of two values. +**Syntax** -Type: [Float](../../sql-reference/data-types/float.md). +```sql +abs(a) +``` + +## gcd + +Returns the greatest common divisor of two values `a` and `b`. + +An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. + +**Syntax** + +```sql +gcd(a, b) +``` + +## lcm(a, b) + +Returns the least common multiple of two values `a` and `b`. + +An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. + +**Syntax** + +```sql +lcm(a, b) +``` + +## max2 + +Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). + +**Syntax** + +```sql +max2(a, b) +``` **Example** @@ -164,7 +257,7 @@ SELECT max2(-1, 2); Result: -```text +```result ┌─max2(-1, 2)─┐ │ 2 │ └─────────────┘ @@ -172,25 +265,14 @@ Result: ## min2 -Compares two values and returns the minimum. The returned value is converted to [Float64](../../sql-reference/data-types/float.md). +Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). **Syntax** ```sql -min2(value1, value2) +min2(a, b) ``` -**Arguments** - -- `value1` — First value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). -- `value2` — Second value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). - -**Returned value** - -- The minimum of two values. - -Type: [Float](../../sql-reference/data-types/float.md). - **Example** Query: @@ -201,21 +283,19 @@ SELECT min2(-1, 2); Result: -```text +```result ┌─min2(-1, 2)─┐ │ -1 │ └─────────────┘ ``` -## multiplyDecimal(a, b[, result_scale]) +## multiplyDecimal -Performs multiplication on two decimals. Result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). -Result scale can be explicitly specified by `result_scale` argument (const Integer in range `[0, 76]`). If not specified, the result scale is the max scale of given arguments. +Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). -:::note -These functions work significantly slower than usual `multiply`. -In case you don't really need controlled precision and/or need fast computation, consider using [multiply](#multiply) -::: +The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. + +This function work significantly slower than usual `multiply`. In case no control over the result precision is needed and/or fast computation is desired, consider using `multiply`. **Syntax** @@ -225,31 +305,34 @@ multiplyDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). +- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -- The result of multiplication with given scale. +- The result of multiplication with given scale. Type: [Decimal256](../../sql-reference/data-types/decimal.md). **Example** -```text +```result ┌─multiplyDecimal(toDecimal256(-12, 0), toDecimal32(-2.1, 1), 1)─┐ │ 25.2 │ └────────────────────────────────────────────────────────────────┘ ``` -**Difference from regular multiplication:** +**Differences compared to regular multiplication:** + ```sql SELECT toDecimal64(-12.647, 3) * toDecimal32(2.1239, 4); SELECT toDecimal64(-12.647, 3) as a, toDecimal32(2.1239, 4) as b, multiplyDecimal(a, b); ``` -```text +Result: + +```result ┌─multiply(toDecimal64(-12.647, 3), toDecimal32(2.1239, 4))─┐ │ -26.8609633 │ └───────────────────────────────────────────────────────────┘ @@ -270,7 +353,9 @@ SELECT a * b; ``` -```text +Result: + +```result ┌─────────────a─┬─────────────b─┬─multiplyDecimal(toDecimal64(-12.647987876, 9), toDecimal64(123.967645643, 9))─┐ │ -12.647987876 │ 123.967645643 │ -1567.941279108 │ └───────────────┴───────────────┴───────────────────────────────────────────────────────────────────────────────┘ @@ -279,15 +364,14 @@ Received exception from server (version 22.11.1): Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal math overflow: While processing toDecimal64(-12.647987876, 9) AS a, toDecimal64(123.967645643, 9) AS b, a * b. (DECIMAL_OVERFLOW) ``` -## divideDecimal(a, b[, result_scale]) +## divideDecimal -Performs division on two decimals. Result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). -Result scale can be explicitly specified by `result_scale` argument (const Integer in range `[0, 76]`). If not specified, the result scale is the max scale of given arguments. -:::note -These function work significantly slower than usual `divide`. -In case you don't really need controlled precision and/or need fast computation, consider using [divide](#divide). -::: +Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). + +The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. + +This function work significantly slower than usual `divide`. In case no control over the result precision is needed and/or fast computation is desired, consider using `divide`. **Syntax** @@ -297,31 +381,34 @@ divideDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). +- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -- The result of division with given scale. +- The result of division with given scale. Type: [Decimal256](../../sql-reference/data-types/decimal.md). **Example** -```text +```result ┌─divideDecimal(toDecimal256(-12, 0), toDecimal32(2.1, 1), 10)─┐ │ -5.7142857142 │ └──────────────────────────────────────────────────────────────┘ ``` -**Difference from regular division:** +**Differences compared to regular division:** + ```sql SELECT toDecimal64(-12, 1) / toDecimal32(2.1, 1); SELECT toDecimal64(-12, 1) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5); ``` -```text +Result: + +```result ┌─divide(toDecimal64(-12, 1), toDecimal32(2.1, 1))─┐ │ -5.7 │ └──────────────────────────────────────────────────┘ @@ -336,7 +423,9 @@ SELECT toDecimal64(-12, 0) / toDecimal32(2.1, 1); SELECT toDecimal64(-12, 0) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5); ``` -```text +Result: + +```result DB::Exception: Decimal result's scale is less than argument's one: While processing toDecimal64(-12, 0) / toDecimal32(2.1, 1). (ARGUMENT_OUT_OF_BOUND) ┌───a─┬───b─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 1)─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 5)─┐ diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 9d2f89c1837..3e70f94a0d2 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/array-functions -sidebar_position: 35 +sidebar_position: 10 sidebar_label: Arrays --- @@ -18,7 +18,7 @@ empty([x]) An array is considered empty if it does not contain any elements. -:::note +:::note Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. ::: @@ -26,11 +26,11 @@ The function also works for [strings](string-functions.md#empty) or [UUID](uuid- **Arguments** -- `[x]` — Input array. [Array](../data-types/array.md). +- `[x]` — Input array. [Array](../data-types/array.md). **Returned value** -- Returns `1` for an empty array or `0` for a non-empty array. +- Returns `1` for an empty array or `0` for a non-empty array. Type: [UInt8](../data-types/int-uint.md). @@ -62,7 +62,7 @@ notEmpty([x]) An array is considered non-empty if it contains at least one element. -:::note +:::note Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. ::: @@ -70,11 +70,11 @@ The function also works for [strings](string-functions.md#notempty) or [UUID](uu **Arguments** -- `[x]` — Input array. [Array](../data-types/array.md). +- `[x]` — Input array. [Array](../data-types/array.md). **Returned value** -- Returns `1` for a non-empty array or `0` for an empty array. +- Returns `1` for a non-empty array or `0` for an empty array. Type: [UInt8](../data-types/int-uint.md). @@ -118,38 +118,41 @@ Accepts zero arguments and returns an empty array of the appropriate type. Accepts an empty array and returns a one-element array that is equal to the default value. - ## range(end), range(\[start, \] end \[, step\]) Returns an array of numbers from `start` to `end - 1` by `step`. The supported types are [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64](../data-types/int-uint.md). **Syntax** + ``` sql range([start, ] end [, step]) ``` **Arguments** -- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. -- `end` — The number before which the array is constructed. Required. -- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. +- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. +- `end` — The number before which the array is constructed. Required. +- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. **Returned value** -- Array of numbers from `start` to `end - 1` by `step`. +- Array of numbers from `start` to `end - 1` by `step`. **Implementation details** -- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's. -- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's. +- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. **Examples** Query: + ``` sql SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); ``` + Result: + ```txt ┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐ │ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │ @@ -176,7 +179,7 @@ arrayConcat(arrays) **Arguments** -- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. +- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. **Example** @@ -226,19 +229,19 @@ hasAll(set, subset) **Arguments** -- `set` – Array of any type with a set of elements. -- `subset` – Array of any type with elements that should be tested to be a subset of `set`. +- `set` – Array of any type with a set of elements. +- `subset` – Array of any type with elements that should be tested to be a subset of `set`. **Return values** -- `1`, if `set` contains all of the elements from `subset`. -- `0`, otherwise. +- `1`, if `set` contains all of the elements from `subset`. +- `0`, otherwise. **Peculiar properties** -- An empty array is a subset of any array. -- `Null` processed as a value. -- Order of values in both of arrays does not matter. +- An empty array is a subset of any array. +- `Null` processed as a value. +- Order of values in both of arrays does not matter. **Examples** @@ -264,18 +267,18 @@ hasAny(array1, array2) **Arguments** -- `array1` – Array of any type with a set of elements. -- `array2` – Array of any type with a set of elements. +- `array1` – Array of any type with a set of elements. +- `array2` – Array of any type with a set of elements. **Return values** -- `1`, if `array1` and `array2` have one similar element at least. -- `0`, otherwise. +- `1`, if `array1` and `array2` have one similar element at least. +- `0`, otherwise. **Peculiar properties** -- `Null` processed as a value. -- Order of values in both of arrays does not matter. +- `Null` processed as a value. +- Order of values in both of arrays does not matter. **Examples** @@ -301,24 +304,25 @@ In other words, the functions will check whether all the elements of `array2` ar the `hasAll` function. In addition, it will check that the elements are observed in the same order in both `array1` and `array2`. For Example: + - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`. - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`. **Arguments** -- `array1` – Array of any type with a set of elements. -- `array2` – Array of any type with a set of elements. +- `array1` – Array of any type with a set of elements. +- `array2` – Array of any type with a set of elements. **Return values** -- `1`, if `array1` contains `array2`. -- `0`, otherwise. +- `1`, if `array1` contains `array2`. +- `0`, otherwise. **Peculiar properties** -- The function will return `1` if `array2` is empty. -- `Null` processed as a value. In other words `hasSubstr([1, 2, NULL, 3, 4], [2,3])` will return `0`. However, `hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` will return `1` -- Order of values in both of arrays does matter. +- The function will return `1` if `array2` is empty. +- `Null` processed as a value. In other words `hasSubstr([1, 2, NULL, 3, 4], [2,3])` will return `0`. However, `hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` will return `1` +- Order of values in both of arrays does matter. **Examples** @@ -484,7 +488,7 @@ arrayPopBack(array) **Arguments** -- `array` – Array. +- `array` – Array. **Example** @@ -508,7 +512,7 @@ arrayPopFront(array) **Arguments** -- `array` – Array. +- `array` – Array. **Example** @@ -532,8 +536,8 @@ arrayPushBack(array, single_value) **Arguments** -- `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `array` – Array. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -557,8 +561,8 @@ arrayPushFront(array, single_value) **Arguments** -- `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `array` – Array. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -582,11 +586,11 @@ arrayResize(array, size[, extender]) **Arguments:** -- `array` — Array. -- `size` — Required length of the array. - - If `size` is less than the original size of the array, the array is truncated from the right. -- If `size` is larger than the initial size of the array, the array is extended to the right with `extender` values or default values for the data type of the array items. -- `extender` — Value for extending an array. Can be `NULL`. +- `array` — Array. +- `size` — Required length of the array. + - If `size` is less than the original size of the array, the array is truncated from the right. +- If `size` is larger than the initial size of the array, the array is extended to the right with `extender` values or default values for the data type of the array items. +- `extender` — Value for extending an array. Can be `NULL`. **Returned value:** @@ -642,7 +646,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Array elements set to `NULL` are handled as normal values. -## arraySort(\[func,\] arr, …) +## arraySort(\[func,\] arr, …) {#array_functions-sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -682,10 +686,10 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); └───────────────────────────────────────────────────────────┘ ``` -- `-Inf` values are first in the array. -- `NULL` values are last in the array. -- `NaN` values are right before `NULL`. -- `Inf` values are right before `NaN`. +- `-Inf` values are first in the array. +- `NULL` values are last in the array. +- `NaN` values are right before `NULL`. +- `Inf` values are right before `NaN`. Note that `arraySort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array. @@ -739,11 +743,15 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; └─────────┘ ``` -:::note +:::note To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. ::: -## arrayReverseSort(\[func,\] arr, …) +## arrayPartialSort(\[func,\] limit, arr, …) + +Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. + +## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -783,10 +791,10 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; └───────────────────────────────────────┘ ``` -- `Inf` values are first in the array. -- `NULL` values are last in the array. -- `NaN` values are right before `NULL`. -- `-Inf` values are right before `NaN`. +- `Inf` values are first in the array. +- `NULL` values are last in the array. +- `NaN` values are right before `NULL`. +- `-Inf` values are right before `NaN`. Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Example is shown below. @@ -802,8 +810,8 @@ SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; The array is sorted in the following way: -1. At first, the source array (\[1, 2, 3\]) is sorted according to the result of the lambda function applied to the elements of the array. The result is an array \[3, 2, 1\]. -2. Array that is obtained on the previous step, is reversed. So, the final result is \[1, 2, 3\]. +1. At first, the source array (\[1, 2, 3\]) is sorted according to the result of the lambda function applied to the elements of the array. The result is an array \[3, 2, 1\]. +2. Array that is obtained on the previous step, is reversed. So, the final result is \[1, 2, 3\]. The lambda function can accept multiple arguments. In this case, you need to pass the `arrayReverseSort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example: @@ -819,8 +827,8 @@ SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; In this example, the array is sorted in the following way: -1. At first, the source array (\[‘hello’, ‘world’\]) is sorted according to the result of the lambda function applied to the elements of the arrays. The elements that are passed in the second array (\[2, 1\]), define the sorting keys for corresponding elements from the source array. The result is an array \[‘world’, ‘hello’\]. -2. Array that was sorted on the previous step, is reversed. So, the final result is \[‘hello’, ‘world’\]. +1. At first, the source array (\[‘hello’, ‘world’\]) is sorted according to the result of the lambda function applied to the elements of the arrays. The elements that are passed in the second array (\[2, 1\]), define the sorting keys for corresponding elements from the source array. The result is an array \[‘world’, ‘hello’\]. +2. Array that was sorted on the previous step, is reversed. So, the final result is \[‘hello’, ‘world’\]. Other examples are shown below. @@ -844,6 +852,10 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` +## arrayPartialReverseSort(\[func,\] limit, arr, …) + +Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. + ## arrayUniq(arr, …) If one argument is passed, it counts the number of different elements in the array. @@ -857,7 +869,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates the difference between adjacent array elements. Returns an array where the first element will be 0, the second is the difference between `a[1] - a[0]`, etc. The type of elements in the resulting array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** @@ -867,11 +879,11 @@ arrayDifference(array) **Arguments** -- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). **Returned values** -Returns an array of differences between adjacent elements. +Returns an array of differences between adjacent array elements. Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). @@ -919,7 +931,7 @@ arrayDistinct(array) **Arguments** -- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). **Returned values** @@ -987,8 +999,8 @@ arrayReduce(agg_func, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). +- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** @@ -1052,13 +1064,13 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. -- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). +- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. +- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** -- Array containing results of the aggregate function over specified ranges. +- Array containing results of the aggregate function over specified ranges. Type: [Array](../../sql-reference/data-types/array.md). @@ -1108,8 +1120,8 @@ Converts an array of arrays to a flat array. Function: -- Applies to any depth of nested arrays. -- Does not change arrays that are already flat. +- Applies to any depth of nested arrays. +- Does not change arrays that are already flat. The flattened array contains all the elements from all source arrays. @@ -1123,7 +1135,7 @@ Alias: `flatten`. **Arguments** -- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. **Examples** @@ -1185,13 +1197,13 @@ arrayZip(arr1, arr2, ..., arrN) **Arguments** -- `arrN` — [Array](../../sql-reference/data-types/array.md). +- `arrN` — [Array](../../sql-reference/data-types/array.md). The function can take any number of arrays of different types. All the input arrays must be of equal size. **Returned value** -- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. +- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. Type: [Array](../../sql-reference/data-types/array.md). @@ -1213,7 +1225,7 @@ Result: ## arrayAUC -Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve). +Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: ). **Syntax** @@ -1433,12 +1445,12 @@ arrayMin([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The minimum of function values (or the array minimum). +- The minimum of function values (or the array minimum). Type: if `func` is specified, matches `func` return value type, else matches the array elements type. @@ -1488,12 +1500,12 @@ arrayMax([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The maximum of function values (or the array maximum). +- The maximum of function values (or the array maximum). Type: if `func` is specified, matches `func` return value type, else matches the array elements type. @@ -1543,12 +1555,12 @@ arraySum([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The sum of the function values (or the array sum). +- The sum of the function values (or the array sum). Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). @@ -1598,12 +1610,12 @@ arrayAvg([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The average of function values (or the array average). +- The average of function values (or the array average). Type: [Float64](../../sql-reference/data-types/float.md). @@ -1639,7 +1651,23 @@ Result: ## arrayCumSum(\[func,\] arr1, …) -Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by `func(arr1[i], …, arrN[i])` before summing. +Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. + +**Syntax** + +``` sql +arrayCumSum(arr) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. + +**Returned value** + +- Returns an array of the partial sums of the elements in the source array. + +Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). Example: @@ -1655,9 +1683,25 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayCumSumNonNegative(arr) +## arrayCumSumNonNegative(\[func,\] arr1, …) -Same as `arrayCumSum`, returns an array of partial sums of elements in the source array (a running sum). Different `arrayCumSum`, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example: +Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. + +**Syntax** + +``` sql +arrayCumSumNonNegative(arr) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. + +**Returned value** + +- Returns an array of non-negative partial sums of elements in the source array. + +Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). ``` sql SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res @@ -1668,6 +1712,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res │ [1,2,0,1] │ └───────────┘ ``` + Note that the `arraySumNonNegative` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. ## arrayProduct @@ -1682,11 +1727,11 @@ arrayProduct(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. **Returned value** -- A product of array's elements. +- A product of array's elements. Type: [Float64](../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/array-join.md b/docs/en/sql-reference/functions/array-join.md index d6256ba2dc5..bbb9ca1ba04 100644 --- a/docs/en/sql-reference/functions/array-join.md +++ b/docs/en/sql-reference/functions/array-join.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/array-join -sidebar_position: 61 +sidebar_position: 15 sidebar_label: arrayJoin --- @@ -78,6 +78,22 @@ GROUP BY │ 1 │ Bobruisk │ Firefox │ └─────────────┴──────────┴─────────┘ ``` +### Important note! +Using multiple `arrayJoin` with same expression may not produce expected results due to optimizations. +For that cases, consider modifying repeated array expression with extra operations that do not affect join result - e.g. `arrayJoin(arraySort(arr))`, `arrayJoin(arrayConcat(arr, []))` + +Example: +```sql +SELECT + arrayJoin(dice) as first_throw, + /* arrayJoin(dice) as second_throw */ -- is technically correct, but will annihilate result set + arrayJoin(arrayConcat(dice, [])) as second_throw -- intentionally changed expression to force re-evaluation +FROM ( + SELECT [1, 2, 3, 4, 5, 6] as dice +); +``` + + Note the [ARRAY JOIN](../statements/select/array-join.md) syntax in the SELECT query, which provides broader possibilities. `ARRAY JOIN` allows you to convert multiple arrays with the same number of elements at a time. diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 1648ce35056..5b342fe4f24 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/bit-functions -sidebar_position: 48 +sidebar_position: 20 sidebar_label: Bit --- @@ -34,12 +34,12 @@ bitShiftLeft(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** -- Shifted value. +- Shifted value. The type of the returned value is the same as the type of the input value. @@ -81,12 +81,12 @@ bitShiftRight(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** -- Shifted value. +- Shifted value. The type of the returned value is the same as the type of the input value. @@ -179,8 +179,8 @@ SELECT bitTest(number, index) **Arguments** -- `number` – Integer number. -- `index` – Position of bit. +- `number` – Integer number. +- `index` – Position of bit. **Returned values** @@ -244,8 +244,8 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...) **Arguments** -- `number` – Integer number. -- `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). +- `number` – Integer number. +- `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). **Returned values** @@ -309,12 +309,12 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...) **Arguments** -- `number` – Integer number. -- `index1`, `index2`, `index3`, `index4` – Positions of bit. +- `number` – Integer number. +- `index1`, `index2`, `index3`, `index4` – Positions of bit. **Returned values** -Returns result of logical disjuction. +Returns result of logical disjunction. Type: `UInt8`. @@ -364,11 +364,11 @@ bitCount(x) **Arguments** -- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. +- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. **Returned value** -- Number of bits set to one in the input number. +- Number of bits set to one in the input number. The function does not convert input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. @@ -404,12 +404,12 @@ bitHammingDistance(int1, int2) **Arguments** -- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). -- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). **Returned value** -- The Hamming distance. +- The Hamming distance. Type: [UInt8](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 9751de8abed..9b66d00656b 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -1,22 +1,18 @@ --- slug: /en/sql-reference/functions/bitmap-functions -sidebar_position: 49 +sidebar_position: 25 sidebar_label: Bitmap --- # Bitmap Functions -Bitmap functions work for two bitmaps Object value calculation, it is to return new bitmap or cardinality while using formula calculation, such as and, or, xor, and not, etc. - -There are 2 kinds of construction methods for Bitmap Object. One is to be constructed by aggregation function groupBitmap with -State, the other is to be constructed by Array Object. It is also to convert Bitmap Object to Array Object. - -RoaringBitmap is wrapped into a data structure while actual storage of Bitmap objects. When the cardinality is less than or equal to 32, it uses Set objet. When the cardinality is greater than 32, it uses RoaringBitmap object. That is why storage of low cardinality set is faster. - -For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring). +Bitmaps can be constructed in two ways. The first way is constructed by aggregation function groupBitmap with `-State`, the other way is to constructed a bitmap from an Array object. ## bitmapBuild -Build a bitmap from unsigned integer array. +Builds a bitmap from an unsigned integer array. + +**Syntax** ``` sql bitmapBuild(array) @@ -24,7 +20,7 @@ bitmapBuild(array) **Arguments** -- `array` – Unsigned integer array. +- `array` – Unsigned integer array. **Example** @@ -40,7 +36,9 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res); ## bitmapToArray -Convert bitmap to integer array. +Converts bitmap to an integer array. + +**Syntax** ``` sql bitmapToArray(bitmap) @@ -48,7 +46,7 @@ bitmapToArray(bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -56,6 +54,8 @@ bitmapToArray(bitmap) SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─────────┐ │ [1,2,3,4,5] │ @@ -64,7 +64,9 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapSubsetInRange -Return subset in specified range (not include the range_end). +Returns the subset of a bitmap with bits within a value interval. + +**Syntax** ``` sql bitmapSubsetInRange(bitmap, range_start, range_end) @@ -72,9 +74,9 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `range_end` – Range end point (excluded). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_end` – End of the range (exclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -82,6 +84,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end) SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res; ``` +Result: + ``` text ┌─res───────────────┐ │ [30,31,32,33,100] │ @@ -90,7 +94,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11, ## bitmapSubsetLimit -Creates a subset of bitmap with n elements taken between `range_start` and `cardinality_limit`. +Returns a subset of a bitmap with smallest bit value `range_start` and at most `cardinality_limit` elements. **Syntax** @@ -100,20 +104,12 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The subset cardinality upper limit. Type: [UInt32](../../sql-reference/data-types/int-uint.md). - -**Returned value** - -The subset. - -Type: [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `cardinality_limit` – Maximum cardinality of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res; ``` @@ -128,7 +124,7 @@ Result: ## subBitmap -Returns the bitmap elements, starting from the `offset` position. The number of returned elements is limited by the `cardinality_limit` parameter. Analog of the [substring](string-functions.md#substring)) string function, but for bitmap. +Returns a subset of the bitmap, starting from position `offset`. The maximum cardinality of the returned bitmap is `cardinality_limit`. **Syntax** @@ -138,20 +134,12 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** -- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). -- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). - -**Returned value** - -The subset. - -Type: [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). +- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT bitmapToArray(subBitmap(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(10), toUInt32(10))) AS res; ``` @@ -169,18 +157,18 @@ Result: Checks whether the bitmap contains an element. ``` sql -bitmapContains(haystack, needle) +bitmapContains(bitmap, needle) ``` **Arguments** -- `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches. -- `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `needle` – Searched bit value. Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Returned values** -- 0 — If `haystack` does not contain `needle`. -- 1 — If `haystack` contains `needle`. +- 0 — If `bitmap` does not contain `needle`. +- 1 — If `bitmap` contains `needle`. Type: `UInt8`. @@ -190,6 +178,8 @@ Type: `UInt8`. SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -198,22 +188,25 @@ SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res; ## bitmapHasAny -Checks whether two bitmaps have intersection by some elements. +Checks whether two bitmaps intersect. + +If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmap_functions-bitmapcontains) instead as it works more efficiently. + +**Syntax** ``` sql bitmapHasAny(bitmap1, bitmap2) ``` -If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently. - **Arguments** -- `bitmap*` – Bitmap object. +- `bitmap1` – Bitmap object 1. +- `bitmap2` – Bitmap object 2. **Return values** -- `1`, if `bitmap1` and `bitmap2` have one similar element at least. -- `0`, otherwise. +- `1`, if `bitmap1` and `bitmap2` have at least one shared element. +- `0`, otherwise. **Example** @@ -221,6 +214,8 @@ If you are sure that `bitmap2` contains strictly one element, consider using the SELECT bitmapHasAny(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -229,16 +224,21 @@ SELECT bitmapHasAny(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapHasAll -Analogous to `hasAll(array, array)` returns 1 if the first bitmap contains all the elements of the second one, 0 otherwise. -If the second argument is an empty bitmap then returns 1. +Returns 1 if the first bitmap contains all elements of the second bitmap, otherwise 0. +If the second bitmap is empty, returns 1. + +Also see `hasAll(array, array)`. + +**Syntax** ``` sql -bitmapHasAll(bitmap,bitmap) +bitmapHasAll(bitmap1, bitmap2) ``` **Arguments** -- `bitmap` – Bitmap object. +- `bitmap1` – Bitmap object 1. +- `bitmap2` – Bitmap object 2. **Example** @@ -246,6 +246,8 @@ bitmapHasAll(bitmap,bitmap) SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 0 │ @@ -254,7 +256,9 @@ SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapCardinality -Retrun bitmap cardinality of type UInt64. +Returns the cardinality of a bitmap. + +**Syntax** ``` sql bitmapCardinality(bitmap) @@ -262,7 +266,7 @@ bitmapCardinality(bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -270,6 +274,8 @@ bitmapCardinality(bitmap) SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 5 │ @@ -278,13 +284,17 @@ SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapMin -Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is empty. +Computes the smallest bit set in a bitmap, or UINT32_MAX if the bitmap is empty. - bitmapMin(bitmap) +**Syntax** + +```sql +bitmapMin(bitmap) +``` **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -292,6 +302,8 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -300,13 +312,17 @@ SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapMax -Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. +Computes the greatest bit set in a bitmap, or 0 if the bitmap is empty. - bitmapMax(bitmap) +**Syntax** + +```sql +bitmapMax(bitmap) +``` **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -314,6 +330,8 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 5 │ @@ -322,15 +340,21 @@ SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapTransform -Transform an array of values in a bitmap to another array of values, the result is a new bitmap. +Replaces at most N bits in a bitmap. The old and new value of the i-th replaced bit is given by `from_array[i]` and `to_array[i]`. - bitmapTransform(bitmap, from_array, to_array) +The result depends on the array ordering if `from_array` and `to_array`. + +**Syntax** + +``` sql +bitmapTransform(bitmap, from_array, to_array) +``` **Arguments** -- `bitmap` – Bitmap object. -- `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array. -- `to_array` – UInt32 array, its size shall be the same to from_array. +- `bitmap` – Bitmap object. +- `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. +- `to_array` – UInt32 array with the same size as `from_array`. **Example** @@ -338,6 +362,8 @@ Transform an array of values in a bitmap to another array of values, the result SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)))) AS res; ``` +Result: + ``` text ┌─res───────────────────┐ │ [1,3,4,6,7,8,9,10,20] │ @@ -346,7 +372,9 @@ SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ## bitmapAnd -Two bitmap and calculation, the result is a new bitmap. +Computes the logical conjunction of two two bitmaps. + +**Syntax** ``` sql bitmapAnd(bitmap,bitmap) @@ -354,7 +382,7 @@ bitmapAnd(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -362,6 +390,8 @@ bitmapAnd(bitmap,bitmap) SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res─┐ │ [3] │ @@ -370,7 +400,9 @@ SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re ## bitmapOr -Two bitmap or calculation, the result is a new bitmap. +Computes the logical disjunction of two bitmaps. + +**Syntax** ``` sql bitmapOr(bitmap,bitmap) @@ -378,7 +410,7 @@ bitmapOr(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -386,6 +418,8 @@ bitmapOr(bitmap,bitmap) SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res─────────┐ │ [1,2,3,4,5] │ @@ -394,7 +428,9 @@ SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res ## bitmapXor -Two bitmap xor calculation, the result is a new bitmap. +Xor-s two bitmaps. + +**Syntax** ``` sql bitmapXor(bitmap,bitmap) @@ -402,7 +438,7 @@ bitmapXor(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -410,6 +446,8 @@ bitmapXor(bitmap,bitmap) SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res───────┐ │ [1,2,4,5] │ @@ -418,7 +456,9 @@ SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re ## bitmapAndnot -Two bitmap andnot calculation, the result is a new bitmap. +Computes the logical conjunction of two bitmaps and negates the result. + +**Syntax** ``` sql bitmapAndnot(bitmap,bitmap) @@ -426,7 +466,7 @@ bitmapAndnot(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -434,6 +474,8 @@ bitmapAndnot(bitmap,bitmap) SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res───┐ │ [1,2] │ @@ -442,7 +484,9 @@ SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS ## bitmapAndCardinality -Two bitmap and calculation, return cardinality of type UInt64. +Returns the cardinality of the logical conjunction of two bitmaps. + +**Syntax** ``` sql bitmapAndCardinality(bitmap,bitmap) @@ -450,7 +494,7 @@ bitmapAndCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -458,6 +502,8 @@ bitmapAndCardinality(bitmap,bitmap) SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -466,7 +512,7 @@ SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapOrCardinality -Two bitmap or calculation, return cardinality of type UInt64. +Returns the cardinality of the logical disjunction of two bitmaps. ``` sql bitmapOrCardinality(bitmap,bitmap) @@ -474,7 +520,7 @@ bitmapOrCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -482,6 +528,8 @@ bitmapOrCardinality(bitmap,bitmap) SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 5 │ @@ -490,7 +538,7 @@ SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapXorCardinality -Two bitmap xor calculation, return cardinality of type UInt64. +Returns the cardinality of the XOR of two bitmaps. ``` sql bitmapXorCardinality(bitmap,bitmap) @@ -498,7 +546,7 @@ bitmapXorCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -506,6 +554,8 @@ bitmapXorCardinality(bitmap,bitmap) SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 4 │ @@ -514,7 +564,7 @@ SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapAndnotCardinality -Two bitmap andnot calculation, return cardinality of type UInt64. +Returns the cardinality of the AND-NOT operation of two bitmaps. ``` sql bitmapAndnotCardinality(bitmap,bitmap) @@ -522,7 +572,7 @@ bitmapAndnotCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -530,6 +580,8 @@ bitmapAndnotCardinality(bitmap,bitmap) SELECT bitmapAndnotCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 2 │ diff --git a/docs/en/sql-reference/functions/comparison-functions.md b/docs/en/sql-reference/functions/comparison-functions.md index 586c0dc54e6..297d84eb8a5 100644 --- a/docs/en/sql-reference/functions/comparison-functions.md +++ b/docs/en/sql-reference/functions/comparison-functions.md @@ -1,34 +1,89 @@ --- slug: /en/sql-reference/functions/comparison-functions -sidebar_position: 36 +sidebar_position: 35 sidebar_label: Comparison --- # Comparison Functions -Comparison functions always return 0 or 1 (Uint8). +Below comparison functions return 0 or 1 as Uint8. The following types can be compared: +- numbers +- strings and fixed strings +- dates +- dates with times -- numbers -- strings and fixed strings -- dates -- dates with times +Only values within the same group can be compared (e.g. UInt16 and UInt64) but not across groups (e.g. UInt16 and DateTime). -within each group, but not between different groups. +Strings are compared byte-by-byte. Note that this may lead to unexpected results if one of the strings contains UTF-8 encoded multi-byte characters. -For example, you can’t compare a date with a string. You have to use a function to convert the string to a date, or vice versa. +A string S1 which has another string S2 as prefix is considered longer than S2. -Strings are compared by bytes. A shorter string is smaller than all strings that start with it and that contain at least one more character. +## equals, `=`, `==` operators -### equals, a `=` b and a `==` b operator +**Syntax** -### notEquals, a `!=` b and a `<>` b operator +```sql +equals(a, b) +``` -### less, `<` operator +Alias: +- `a = b` (operator) +- `a == b` (operator) -### greater, `>` operator +## notEquals, `!=`, `<>` operators -### lessOrEquals, `<=` operator +**Syntax** -### greaterOrEquals, `>=` operator +```sql +notEquals(a, b) +``` + +Alias: +- `a != b` (operator) +- `a <> b` (operator) + +## less, `<` operator + +**Syntax** + +```sql +less(a, b) +``` + +Alias: +- `a < b` (operator) + +## greater, `>` operator + +**Syntax** + +```sql +greater(a, b) +``` + +Alias: +- `a > b` (operator) + +## lessOrEquals, `<=` operator + +**Syntax** + +```sql +lessOrEquals(a, b) +``` + +Alias: +- `a <= b` (operator) + +## greaterOrEquals, `>=` operator + +**Syntax** + +```sql +greaterOrEquals(a, b) +``` + +Alias: +- `a >= b` (operator) diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index ff1ac237025..eb4e98961f1 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -1,39 +1,40 @@ --- slug: /en/sql-reference/functions/conditional-functions -sidebar_position: 43 -sidebar_label: 'Conditional ' +sidebar_position: 40 +sidebar_label: Conditional --- # Conditional Functions ## if -Controls conditional branching. Unlike most systems, ClickHouse always evaluate both expressions `then` and `else`. +Performs conditional branching. + +If the condition `cond` evaluates to a non-zero value, the function returns the result of the expression `then`. If `cond` evaluates to zero or `NULL`, then the result of the `else` expression is returned. + +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, the `then` expression is evaluated only on rows where `cond` is `true` and the `else` expression where `cond` is `false`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT if(number = 0, 0, intDiv(42, number)) FROM numbers(10)`. + +`then` and `else` must be of a similar type. **Syntax** ``` sql if(cond, then, else) ``` - -If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned. - -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `if` function according to a short scheme. If this setting is enabled, `then` expression is evaluated only on rows where `cond` is true, `else` expression – where `cond` is false. For example, an exception about division by zero is not thrown when executing the query `SELECT if(number = 0, 0, intDiv(42, number)) FROM numbers(10)`, because `intDiv(42, number)` will be evaluated only for numbers that doesn't satisfy condition `number = 0`. +Alias: `cond ? then : else` (ternary operator) **Arguments** -- `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL. -- `then` – The expression to return if condition is met. -- `else` – The expression to return if condition is not met. +- `cond` – The evaluated condition. UInt8, Nullable(UInt8) or NULL. +- `then` – The expression returned if `condition` is true. +- `else` – The expression returned if `condition` is `false` or NULL. **Returned values** -The function executes `then` and `else` expressions and returns its result, depending on whether the condition `cond` ended up being zero or not. +The result of either the `then` and `else` expressions, depending on condition `cond`. **Example** -Query: - ``` sql SELECT if(1, plus(2, 2), plus(2, 6)); ``` @@ -46,30 +47,34 @@ Result: └────────────┘ ``` -Query: +## multiIf + +Allows to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query. + +**Syntax** ``` sql -SELECT if(0, plus(2, 2), plus(2, 6)); +multiIf(cond_1, then_1, cond_2, then_2, ..., else) ``` -Result: +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, the `then_i` expression is evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}) AND cond_i)` is `true`, `cond_i` will be evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT multiIf(number = 2, intDiv(1, number), number = 5) FROM numbers(10)`. + +**Arguments** + +The function accepts `2N+1` parameters: +- `cond_N` — The N-th evaluated condition which controls if `then_N` is returned. +- `then_N` — The result of the function when `cond_N` is true. +- `else` — The result of the function if none of conditions is true. + +**Returned values** + +The result of either any of the `then_N` or `else` expressions, depending on the conditions `cond_N`. + +**Example** + +Assuming this table: ``` text -┌─plus(2, 6)─┐ -│ 8 │ -└────────────┘ -``` - -- `then` and `else` must have the lowest common type. - -**Example:** - -Take this `LEFT_RIGHT` table: - -``` sql -SELECT * -FROM LEFT_RIGHT - ┌─left─┬─right─┐ │ ᴺᵁᴸᴸ │ 4 │ │ 1 │ 3 │ @@ -79,69 +84,6 @@ FROM LEFT_RIGHT └──────┴───────┘ ``` -The following query compares `left` and `right` values: - -``` sql -SELECT - left, - right, - if(left < right, 'left is smaller than right', 'right is greater or equal than left') AS is_smaller -FROM LEFT_RIGHT -WHERE isNotNull(left) AND isNotNull(right) - -┌─left─┬─right─┬─is_smaller──────────────────────────┐ -│ 1 │ 3 │ left is smaller than right │ -│ 2 │ 2 │ right is greater or equal than left │ -│ 3 │ 1 │ right is greater or equal than left │ -└──────┴───────┴─────────────────────────────────────┘ -``` - -Note: `NULL` values are not used in this example, check [NULL values in conditionals](#null-values-in-conditionals) section. - -## Ternary Operator - -It works same as `if` function. - -Syntax: `cond ? then : else` - -Returns `then` if the `cond` evaluates to be true (greater than zero), otherwise returns `else`. - -- `cond` must be of type of `UInt8`, and `then` and `else` must have the lowest common type. - -- `then` and `else` can be `NULL` - -**See also** - -- [ifNotFinite](../../sql-reference/functions/other-functions.md#ifnotfinite). - -## multiIf - -Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query. - -**Syntax** - -``` sql -multiIf(cond_1, then_1, cond_2, then_2, ..., else) -``` - -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `multiIf` function according to a short scheme. If this setting is enabled, `then_i` expression is evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}) AND cond_i)` is true, `cond_i` will be evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}))` is true. For example, an exception about division by zero is not thrown when executing the query `SELECT multiIf(number = 2, intDiv(1, number), number = 5) FROM numbers(10)`. - -**Arguments** - -- `cond_N` — The condition for the function to return `then_N`. -- `then_N` — The result of the function when executed. -- `else` — The result of the function if none of the conditions is met. - -The function accepts `2N+1` parameters. - -**Returned values** - -The function returns one of the values `then_N` or `else`, depending on the conditions `cond_N`. - -**Example** - -Again using `LEFT_RIGHT` table. - ``` sql SELECT left, @@ -210,3 +152,85 @@ FROM LEFT_RIGHT │ 4 │ ᴺᵁᴸᴸ │ Both equal │ └──────┴───────┴──────────────────┘ ``` + +## greatest + +Returns the greatest across a list of values. All of the list members must be of comparable types. + +Examples: + +```sql +SELECT greatest(1, 2, toUInt8(3), 3.) result, toTypeName(result) type; +``` +```response +┌─result─┬─type────┐ +│ 3 │ Float64 │ +└────────┴─────────┘ +``` + +:::note +The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. +::: + +```sql +SELECT greatest(['hello'], ['there'], ['world']) +``` +```response +┌─greatest(['hello'], ['there'], ['world'])─┐ +│ ['world'] │ +└───────────────────────────────────────────┘ +``` + +```sql +SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) +``` +```response +┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ +│ 2023-05-12 01:16:59.000 │ +└──---──────────────────────────────────────────────────────────────────────────┘ +``` + +:::note +The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. +::: + +## least + +Returns the least across a list of values. All of the list members must be of comparable types. + +Examples: + +```sql +SELECT least(1, 2, toUInt8(3), 3.) result, toTypeName(result) type; +``` +```response +┌─result─┬─type────┐ +│ 1 │ Float64 │ +└────────┴─────────┘ +``` + +:::note +The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. +::: + +```sql +SELECT least(['hello'], ['there'], ['world']) +``` +```response +┌─least(['hello'], ['there'], ['world'])─┐ +│ ['hello'] │ +└────────────────────────────────────────┘ +``` + +```sql +SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) +``` +```response +┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ +│ 2023-05-12 01:16:59.000 │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + +:::note +The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. +::: diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 49b0bc25edd..668864c49af 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1,14 +1,14 @@ --- slug: /en/sql-reference/functions/date-time-functions -sidebar_position: 39 +sidebar_position: 45 sidebar_label: Dates and Times --- # Functions for Working with Dates and Times -Support for time zones. +Most functions in this section accept an optional time zone argument, e.g. `Europe/Amsterdam`. In this case, the time zone is the specified one instead of the local (default) one. -All functions for working with the date and time that have a logical use for the time zone can accept a second optional time zone argument. Example: Asia/Yekaterinburg. In this case, they use the specified time zone instead of the local (default) one. +**Example** ``` sql SELECT @@ -24,14 +24,127 @@ SELECT └─────────────────────┴────────────┴────────────┴─────────────────────┘ ``` -## timeZone +## makeDate -Returns the timezone of the server. -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +Creates a [Date](../../sql-reference/data-types/date.md) +- from a year, month and day argument, or +- from a year and day of year argument. **Syntax** ``` sql +makeDate(year, month, day); +makeDate(year, day_of_year); +``` + +Alias: +- `MAKEDATE(year, month, day);` +- `MAKEDATE(year, day_of_year);` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Returned value** + +- A date created from the arguments. + +Type: [Date](../../sql-reference/data-types/date.md). + +**Example** + +Create a Date from a year, month and day: + +``` sql +SELECT makeDate(2023, 2, 28) AS Date; +``` + +Result: + +``` text +┌───────date─┐ +│ 2023-02-28 │ +└────────────┘ +``` + +Create a Date from a year and day of year argument: + +``` sql +SELECT makeDate(2023, 42) AS Date; +``` + +Result: + +``` text +┌───────date─┐ +│ 2023-02-11 │ +└────────────┘ +``` +## makeDate32 + +Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). + +## makeDateTime + +Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument. + +**Syntax** + +``` sql +makeDateTime(year, month, day, hour, minute, second[, timezone]) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). + +**Returned value** + +- A date with time created from the arguments. + +Type: [DateTime](../../sql-reference/data-types/datetime.md). + +**Example** + +``` sql +SELECT makeDateTime(2023, 2, 28, 17, 12, 33) AS DateTime; +``` + +Result: + +``` text +┌────────────DateTime─┐ +│ 2023-02-28 17:12:33 │ +└─────────────────────┘ +``` + +## makeDateTime64 + +Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Syntax** + +``` sql +makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +``` + +## timeZone + +Returns the timezone of the server. +If the function is executed in the context of a distributed table, it generates a normal column with values relevant to each shard, otherwise it produces a constant value. + +**Syntax** + +```sql timeZone() ``` @@ -39,13 +152,13 @@ Alias: `timezone`. **Returned value** -- Timezone. +- Timezone. Type: [String](../../sql-reference/data-types/string.md). ## toTimeZone -Converts time or date and time to the specified time zone. The time zone is an attribute of the `Date` and `DateTime` data types. The internal value (number of seconds) of the table field or of the resultset's column does not change, the column's type changes and its string representation changes accordingly. +Converts a date or date with time to the specified time zone. Does not change the internal value (number of unix seconds) of the data, only the value's time zone attribute and the value's string representation changes. **Syntax** @@ -57,19 +170,17 @@ Alias: `toTimezone`. **Arguments** -- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). +- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). **Returned value** -- Date and time. +- Date and time. Type: [DateTime](../../sql-reference/data-types/datetime.md). **Example** -Query: - ```sql SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, toTypeName(time_utc) AS type_utc, @@ -99,8 +210,6 @@ type_samoa: DateTime('US/Samoa') int32samoa: 1546300800 ``` -`toTimeZone(time_utc, 'Asia/Yekaterinburg')` changes the `DateTime('UTC')` type to `DateTime('Asia/Yekaterinburg')`. The value (Unixtimestamp) 1546300800 stays the same, but the string representation (the result of the toString() function) changes from `time_utc: 2019-01-01 00:00:00` to `time_yekat: 2019-01-01 05:00:00`. - ## timeZoneOf Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. @@ -115,17 +224,16 @@ Alias: `timezoneOf`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** -- Timezone name. +- Timezone name. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: ``` sql SELECT timezoneOf(now()); ``` @@ -139,8 +247,9 @@ Result: ## timeZoneOffset -Returns a timezone offset in seconds from [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). The function takes into account [daylight saving time](https://en.wikipedia.org/wiki/Daylight_saving_time) and historical timezone changes at the specified date and time. -[IANA timezone database](https://www.iana.org/time-zones) is used to calculate the offset. +Returns the timezone offset in seconds from [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). +The function [daylight saving time](https://en.wikipedia.org/wiki/Daylight_saving_time) and historical timezone changes at the specified date and time into account. +The [IANA timezone database](https://www.iana.org/time-zones) is used to calculate the offset. **Syntax** @@ -152,18 +261,16 @@ Alias: `timezoneOffset`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** -- Offset from UTC in seconds. +- Offset from UTC in seconds. Type: [Int32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT toDateTime('2021-04-21 10:20:30', 'America/New_York') AS Time, toTypeName(Time) AS Type, timeZoneOffset(Time) AS Offset_in_seconds, (Offset_in_seconds / 3600) AS Offset_in_hours; @@ -179,39 +286,39 @@ Result: ## toYear -Converts a date or date with time to a UInt16 number containing the year number (AD). +Converts a date or date with time to the year number (AD) as UInt16 value. Alias: `YEAR`. ## toQuarter -Converts a date or date with time to a UInt8 number containing the quarter number. +Converts a date or date with time to the quarter number as UInt8 value. Alias: `QUARTER`. ## toMonth -Converts a date or date with time to a UInt8 number containing the month number (1-12). +Converts a date or date with time to the month number (1-12) as UInt8 value. Alias: `MONTH`. ## toDayOfYear -Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). +Converts a date or date with time to the number of the day of the year (1-366) as UInt16 value. Alias: `DAYOFYEAR`. ## toDayOfMonth -Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). +Converts a date or date with time to the number of the day in the month (1-31) as UInt8 value. Aliases: `DAYOFMONTH`, `DAY`. ## toDayOfWeek -Converts a date or date with time to a UInt8 number containing the number of the day of the week. +Converts a date or date with time to the number of the day in the week as UInt8 value. -The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument. +The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument. | Mode | First day of week | Range | |------|-------------------|------------------------------------------------| @@ -230,60 +337,73 @@ toDayOfWeek(t[, mode[, timezone]]) ## toHour -Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23). -This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true – even in Moscow the clocks were twice changed at a different time). +Converts a date with time the number of the hour in 24-hour time (0-23) as UInt8 value. + +Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true – even in Moscow the clocks were twice changed at a different time). Alias: `HOUR`. ## toMinute -Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59). +Converts a date with time to the number of the minute of the hour (0-59) as UInt8 value. Alias: `MINUTE`. ## toSecond -Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). -Leap seconds are not accounted for. +Converts a date with time to the second in the minute (0-59) as UInt8 value. Leap seconds are not considered. Alias: `SECOND`. ## toUnixTimestamp -For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). -For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. +Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation. + +If the function is called with a string, it accepts an optional timezone argument. **Syntax** ``` sql -toUnixTimestamp(datetime) +toUnixTimestamp(date) toUnixTimestamp(str, [timezone]) ``` **Returned value** -- Returns the unix timestamp. +- Returns the unix timestamp. Type: `UInt32`. **Example** -Query: - ``` sql -SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp +SELECT + '2017-11-05 08:07:47' AS dt_str, + toUnixTimestamp(dt_str) AS from_str, + toUnixTimestamp(dt_str, 'Asia/Tokyo') AS from_str_tokyo, + toUnixTimestamp(toDateTime(dt_str)) AS from_datetime, + toUnixTimestamp(toDateTime64(dt_str, 0)) AS from_datetime64, + toUnixTimestamp(toDate(dt_str)) AS from_date, + toUnixTimestamp(toDate32(dt_str)) AS from_date32 +FORMAT Vertical; ``` Result: ``` text -┌─unix_timestamp─┐ -│ 1509836867 │ -└────────────────┘ +Row 1: +────── +dt_str: 2017-11-05 08:07:47 +from_str: 1509869267 +from_str_tokyo: 1509836867 +from_datetime: 1509869267 +from_datetime64: 1509869267 +from_date: 1509840000 +from_date32: 1509840000 ``` :::note -The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default. +The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default. Behavior for * `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. @@ -367,12 +487,12 @@ toStartOfSecond(value, [timezone]) **Arguments** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). **Returned value** -- Input value without sub-seconds. +- Input value without sub-seconds. Type: [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -410,7 +530,7 @@ Result: **See also** -- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. +- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. ## toStartOfFiveMinutes @@ -499,9 +619,9 @@ The following table describes how the mode argument works. For mode values with a meaning of “with 4 or more days this year,” weeks are numbered according to ISO 8601:1988: -- If the week containing January 1 has 4 or more days in the new year, it is week 1. +- If the week containing January 1 has 4 or more days in the new year, it is week 1. -- Otherwise, it is the last week of the previous year, and the next week is week 1. +- Otherwise, it is the last week of the previous year, and the next week is week 1. For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1. It does not matter how many days in the new year the week contained, even if it contained only one day. @@ -513,9 +633,9 @@ toWeek(t[, mode[, time_zone]]) **Arguments** -- `t` – Date or DateTime. -- `mode` – Optional parameter, Range of values is \[0,9\], default is 0. -- `Timezone` – Optional parameter, it behaves like any other conversion function. +- `t` – Date or DateTime. +- `mode` – Optional parameter, Range of values is \[0,9\], default is 0. +- `Timezone` – Optional parameter, it behaves like any other conversion function. **Example** @@ -570,7 +690,7 @@ age('unit', startdate, enddate, [timezone]) **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - `second` (possible abbreviations: `ss`, `s`) @@ -582,11 +702,11 @@ age('unit', startdate, enddate, [timezone]) - `quarter` (possible abbreviations: `qq`, `q`) - `year` (possible abbreviations: `yyyy`, `yy`) -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -596,8 +716,6 @@ Type: [Int](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); ``` @@ -610,8 +728,6 @@ Result: └───────────────────────────────────────────────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT toDate('2022-01-01') AS e, @@ -645,11 +761,11 @@ For an alternative to `date\_diff`, see function `age`. date_diff('unit', startdate, enddate, [timezone]) ``` -Aliases: `dateDiff`, `DATE_DIFF`. +Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_DIFF`. **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - `second` (possible abbreviations: `ss`, `s`) @@ -661,11 +777,11 @@ Aliases: `dateDiff`, `DATE_DIFF`. - `quarter` (possible abbreviations: `qq`, `q`) - `year` (possible abbreviations: `yyyy`, `yy`) -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -675,8 +791,6 @@ Type: [Int](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); ``` @@ -689,8 +803,6 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT toDate('2022-01-01') AS e, @@ -722,7 +834,7 @@ Alias: `dateTrunc`. **Arguments** -- `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). +- `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). Possible values: - `second` @@ -734,12 +846,12 @@ Alias: `dateTrunc`. - `quarter` - `year` -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). **Returned value** -- Value, truncated to the specified part of date. +- Value, truncated to the specified part of date. Type: [DateTime](../../sql-reference/data-types/datetime.md). @@ -775,7 +887,7 @@ Result: **See Also** -- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) +- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) ## date\_add @@ -791,7 +903,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). Possible values: - `second` @@ -803,8 +915,8 @@ Aliases: `dateAdd`, `DATE_ADD`. - `quarter` - `year` -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). +- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** @@ -814,8 +926,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ```sql SELECT date_add(YEAR, 3, toDate('2018-01-01')); ``` @@ -842,7 +952,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: The unit should be unquoted. +- `unit` — The type of interval to subtract. Note: The unit should be unquoted. Possible values: @@ -855,8 +965,8 @@ Aliases: `dateSub`, `DATE_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). +- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** @@ -866,8 +976,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ``` sql SELECT date_sub(YEAR, 3, toDate('2018-01-01')); ``` @@ -894,9 +1002,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). Possible values: - `second` @@ -916,8 +1024,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ```sql select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH); ``` @@ -944,7 +1050,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. **Arguments** -- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). Possible values: - `second` @@ -956,8 +1062,8 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** @@ -967,8 +1073,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ```sql select timestamp_sub(MONTH, 5, toDateTime('2018-12-18 01:02:03')); ``` @@ -993,11 +1097,11 @@ now([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Current date and time. +- Current date and time. Type: [DateTime](../../sql-reference/data-types/datetime.md). @@ -1043,12 +1147,12 @@ now64([scale], [timezone]) **Arguments** -- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Current date and time with sub-second precision. +- Current date and time with sub-second precision. Type: [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -1080,11 +1184,11 @@ nowInBlock([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Current date and time at the moment of processing of each block of data. +- Current date and time at the moment of processing of each block of data. Type: [DateTime](../../sql-reference/data-types/datetime.md). @@ -1126,15 +1230,60 @@ Rounds the time to the half hour. ## toYYYYMM -Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). +Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. + +**Example** + +``` sql +SELECT + toYYYYMM(now(), 'US/Eastern') +``` + +Result: + +``` text +┌─toYYYYMM(now(), 'US/Eastern')─┐ +│ 202303 │ +└───────────────────────────────┘ +``` ## toYYYYMMDD -Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). +Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. + +**Example** + +```sql +SELECT + toYYYYMMDD(now(), 'US/Eastern') +``` + +Result: + +```response +┌─toYYYYMMDD(now(), 'US/Eastern')─┐ +│ 20230302 │ +└─────────────────────────────────┘ +``` ## toYYYYMMDDhhmmss -Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). +Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. + +**Example** + +```sql +SELECT + toYYYYMMDDhhmmss(now(), 'US/Eastern') +``` + +Result: + +```response +┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐ +│ 20230302112209 │ +└───────────────────────────────────────┘ +``` ## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters @@ -1199,12 +1348,14 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64 └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## formatDateTime +## formatDateTime {#date_time_functions-formatDateTime} Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. +The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). + Alias: `DATE_FORMAT`. **Syntax** @@ -1224,12 +1375,12 @@ Using replacement fields, you can define a pattern for the resulting string. “ |----------|---------------------------------------------------------|------------| | %a | abbreviated weekday name (Mon-Sun) | Mon | | %b | abbreviated month name (Jan-Dec) | Jan | -| %c | month as a decimal number (01-12) | 01 | +| %c | month as an integer number (01-12) | 01 | | %C | year divided by 100 and truncated to integer (00-99) | 20 | | %d | day of the month, zero-padded (01-31) | 02 | | %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 | | %e | day of the month, space-padded (1-31) |   2 | -| %f | fractional second from the fractional part of DateTime64 | 1234560 | +| %f | fractional second, see 'Note 1' below | 1234560 | | %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 | | %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 | | %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 | @@ -1240,29 +1391,31 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %j | day of the year (001-366) | 002 | | %k | hour in 24h format (00-23) | 22 | | %l | hour in 12h format (01-12) | 09 | -| %m | month as a decimal number (01-12) | 01 | -| %M | minute (00-59) | 33 | +| %m | month as an integer number (01-12) | 01 | +| %M | full month name (January-December), see 'Note 2' below | January | | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p | 10:30 PM | -| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 | +| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | | %t | horizontal-tab character (’) | | -| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 | +| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44 | | %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 | | %V | ISO 8601 week number (01-53) | 01 | -| %w | weekday as a decimal number with Sunday as 0 (0-6) | 2 | +| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 | | %W | full weekday name (Monday-Sunday) | Monday | | %y | Year, last two digits (00-99) | 18 | | %Y | Year | 2018 | | %z | Time offset from UTC as +HHMM or -HHMM | -0500 | | %% | a % sign | % | -**Example** +Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0) if the formatted value is a Date, Date32 or DateTime (which have no fractional seconds) or a DateTime64 with a precision of 0. The previous behavior can be restored using setting `formatdatetime_f_prints_single_zero = 1`. -Query: +Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`. + +**Example** ``` sql SELECT formatDateTime(toDate('2010-01-04'), '%g') @@ -1276,8 +1429,6 @@ Result: └────────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f') ``` @@ -1292,17 +1443,18 @@ Result: **See Also** -- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) +- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) -## formatDateTimeInJodaSyntax +## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax} Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. +The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). **Replacement fields** -Using replacement fields, you can define a pattern for the resulting string. +Using replacement fields, you can define a pattern for the resulting string. | Placeholder | Description | Presentation | Examples | @@ -1333,8 +1485,6 @@ Using replacement fields, you can define a pattern for the resulting string. **Example** -Query: - ``` sql SELECT formatDateTimeInJodaSyntax(toDateTime('2010-01-04 12:34:56'), 'yyyy-MM-dd HH:mm:ss') ``` @@ -1360,20 +1510,18 @@ dateName(date_part, date) **Arguments** -- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). +- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** -- The specified part of date. +- The specified part of date. Type: [String](../../sql-reference/data-types/string.md#string) **Example** -Query: - ```sql WITH toDateTime('2021-04-14 11:22:33') AS date_value SELECT @@ -1402,18 +1550,16 @@ monthName(date) **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** -- The name of the month. +- The name of the month. Type: [String](../../sql-reference/data-types/string.md#string) **Example** -Query: - ```sql WITH toDateTime('2021-04-14 11:22:33') AS date_value SELECT monthName(date_value); @@ -1427,28 +1573,26 @@ Result: └───────────────────────┘ ``` -## FROM\_UNIXTIME +## fromUnixTimestamp Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. -FROM_UNIXTIME uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. +fromUnixTimestamp uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. -Alias: `fromUnixTimestamp`. +Alias: `FROM_UNIXTIME`. **Example:** -Query: - ```sql -SELECT FROM_UNIXTIME(423543535); +SELECT fromUnixTimestamp(423543535); ``` Result: ```text -┌─FROM_UNIXTIME(423543535)─┐ -│ 1983-06-04 10:58:55 │ -└──────────────────────────┘ +┌─fromUnixTimestamp(423543535)─┐ +│ 1983-06-04 10:58:55 │ +└──────────────────────────────┘ ``` When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. @@ -1456,7 +1600,7 @@ When there are two or three arguments, the first an [Integer](../../sql-referenc For example: ```sql -SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; +SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime; ``` ```text @@ -1467,26 +1611,25 @@ SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; **See Also** -- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) - +- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) ## fromUnixTimestampInJodaSyntax -Similar to FROM_UNIXTIME, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. + +Similar to fromUnixTimestamp, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. **Example:** -Query: + ``` sql SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC'); ``` Result: ``` -┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')─┐ +┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')────┐ │ 2022-11-30 10:41:12 │ └────────────────────────────────────────────────────────────────────────────┘ ``` - ## toModifiedJulianDay Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD` to a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number in Int32. This function supports date from `0000-01-01` to `9999-12-31`. It raises an exception if the argument cannot be parsed as a date, or the date is invalid. @@ -1499,18 +1642,16 @@ toModifiedJulianDay(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. +- Modified Julian Day number. Type: [Int32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT toModifiedJulianDay('2020-01-01'); ``` @@ -1535,18 +1676,16 @@ toModifiedJulianDayOrNull(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. +- Modified Julian Day number. Type: [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT toModifiedJulianDayOrNull('2020-01-01'); ``` @@ -1571,18 +1710,16 @@ fromModifiedJulianDay(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). **Returned value** -- Date in text form. +- Date in text form. Type: [String](../../sql-reference/data-types/string.md) **Example** -Query: - ``` sql SELECT fromModifiedJulianDay(58849); ``` @@ -1607,18 +1744,16 @@ fromModifiedJulianDayOrNull(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). **Returned value** -- Date in text form. +- Date in text form. Type: [Nullable(String)](../../sql-reference/data-types/string.md) **Example** -Query: - ``` sql SELECT fromModifiedJulianDayOrNull(58849); ``` diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 293e02f8a54..660af6912b0 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -1,7 +1,10 @@ --- slug: /en/sql-reference/functions/distance-functions +sidebar_position: 55 +sidebar_label: Distance --- -# Distance functions + +# Distance Functions ## L1Norm @@ -17,11 +20,11 @@ Alias: `normL1`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. +- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). @@ -55,11 +58,11 @@ Alias: `normL2`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). +- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). Type: [Float](../../sql-reference/data-types/float.md). @@ -93,11 +96,11 @@ Alias: `normLinf`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- Linf-norm or the maximum absolute value. +- Linf-norm or the maximum absolute value. Type: [Float](../../sql-reference/data-types/float.md). @@ -131,12 +134,12 @@ Alias: `normLp`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Returned value** -- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) +- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) Type: [Float](../../sql-reference/data-types/float.md). @@ -170,12 +173,12 @@ Alias: `distanceL1`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- 1-norm distance. +- 1-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -209,12 +212,12 @@ Alias: `distanceL2`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- 2-norm distance. +- 2-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -248,12 +251,12 @@ Alias: `distanceLinf`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- Infinity-norm distance. +- Infinity-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -287,13 +290,13 @@ Alias: `distanceLp`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Returned value** -- p-norm distance. +- p-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -328,11 +331,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -366,11 +369,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -404,11 +407,11 @@ Alias: `normalizeLinf `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -442,12 +445,12 @@ Alias: `normalizeLp `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -479,12 +482,12 @@ cosineDistance(vector1, vector2) **Arguments** -- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- Cosine of the angle between two vectors substracted from one. +- Cosine of the angle between two vectors substracted from one. Type: [Float](../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index cccc02c2553..618dd3f4b4f 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/encoding-functions -sidebar_position: 52 +sidebar_position: 65 sidebar_label: Encoding --- @@ -18,11 +18,11 @@ char(number_1, [number_2, ..., number_n]); **Arguments** -- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). +- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). **Returned value** -- a string of given bytes. +- a string of given bytes. Type: `String`. @@ -98,11 +98,11 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str **Arguments** -- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** -- A string with the hexadecimal representation of the argument. +- A string with the hexadecimal representation of the argument. Type: [String](../../sql-reference/data-types/string.md). @@ -185,13 +185,13 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). +- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). **Returned value** -- A binary string (BLOB). +- A binary string (BLOB). Type: [String](../../sql-reference/data-types/string.md). @@ -247,11 +247,11 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str **Arguments** -- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** -- A string with the binary representation of the argument. +- A string with the binary representation of the argument. Type: [String](../../sql-reference/data-types/string.md). @@ -338,11 +338,11 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to **Arguments** -- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). +- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). **Returned value** -- A binary string (BLOB). +- A binary string (BLOB). Type: [String](../../sql-reference/data-types/string.md). @@ -396,11 +396,11 @@ bitPositionsToArray(arg) **Arguments** -- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -- An array containing a list of positions of bits that equal `1`, in ascending order. +- An array containing a list of positions of bits that equal `1`, in ascending order. Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 1edb20fd9ed..1224b7bc92b 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/encryption-functions -sidebar_position: 67 +sidebar_position: 70 sidebar_label: Encryption -title: "Encryption functions" --- These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm. @@ -17,11 +16,11 @@ Note that these functions work slowly until ClickHouse 21.1. This function encrypts data using these modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-ofb, aes-192-ofb, aes-256-ofb -- aes-128-gcm, aes-192-gcm, aes-256-gcm -- aes-128-ctr, aes-192-ctr, aes-256-ctr +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm +- aes-128-ctr, aes-192-ctr, aes-256-ctr **Syntax** @@ -31,15 +30,15 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -112,9 +111,9 @@ Will produce the same ciphertext as `encrypt` on equal inputs. But when `key` or Supported encryption modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-ofb, aes-192-ofb, aes-256-ofb **Syntax** @@ -124,10 +123,10 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -217,11 +216,11 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv This function decrypts ciphertext into a plaintext using these modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-ofb, aes-192-ofb, aes-256-ofb -- aes-128-gcm, aes-192-gcm, aes-256-gcm -- aes-128-ctr, aes-192-ctr, aes-256-ctr +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm +- aes-128-ctr, aes-192-ctr, aes-256-ctr **Syntax** @@ -231,15 +230,15 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -349,10 +348,10 @@ Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` Supported decryption modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-cfb128 -- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb **Syntax** @@ -362,14 +361,14 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). **Examples** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index b4b7ec5ab21..7d8aa2c0390 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -1,16 +1,16 @@ --- slug: /en/sql-reference/functions/ext-dict-functions -sidebar_position: 58 +sidebar_position: 50 sidebar_label: Dictionaries --- # Functions for Working with Dictionaries -:::note +:::note For dictionaries created with [DDL queries](../../sql-reference/statements/create/dictionary.md), the `dict_name` parameter must be fully specified, like `.`. Otherwise, the current database is used. ::: -For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md). ## dictGet, dictGetOrDefault, dictGetOrNull @@ -24,16 +24,16 @@ dictGetOrNull('dict_name', attr_name, id_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. **Returned value** -- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. +- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. -- If there is no the key, corresponding to `id_expr`, in the dictionary, then: +- If there is no the key, corresponding to `id_expr`, in the dictionary, then: - `dictGet` returns the content of the `` element specified for the attribute in the dictionary configuration. - `dictGetOrDefault` returns the value passed as the `default_value_expr` parameter. @@ -226,7 +226,7 @@ Result: **See Also** -- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) +- [Dictionaries](../../sql-reference/dictionaries/index.md) ## dictHas @@ -238,19 +238,19 @@ dictHas('dict_name', id_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. **Returned value** -- 0, if there is no key. -- 1, if there is a key. +- 0, if there is no key. +- 1, if there is a key. Type: `UInt8`. ## dictGetHierarchy -Creates an array, containing all the parents of a key in the [hierarchical dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md). +Creates an array, containing all the parents of a key in the [hierarchical dictionary](../../sql-reference/dictionaries/index.md#hierarchical-dictionaries). **Syntax** @@ -260,12 +260,12 @@ dictGetHierarchy('dict_name', key) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** -- Parents for the key. +- Parents for the key. Type: [Array(UInt64)](../../sql-reference/data-types/array.md). @@ -279,14 +279,14 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** -- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. -- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. +- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. +- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. Type: `UInt8`. @@ -302,12 +302,12 @@ dictGetChildren(dict_name, key) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned values** -- First-level descendants for the key. +- First-level descendants for the key. Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). @@ -351,13 +351,13 @@ dictGetDescendants(dict_name, key, level) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). **Returned values** -- Descendants for the key. +- Descendants for the key. Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). @@ -409,14 +409,14 @@ ClickHouse supports specialized functions that convert dictionary attribute valu Functions: -- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64` -- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64` -- `dictGetFloat32`, `dictGetFloat64` -- `dictGetDate` -- `dictGetDateTime` -- `dictGetUUID` -- `dictGetString` -- `dictGetIPv4`, `dictGetIPv6` +- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64` +- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64` +- `dictGetFloat32`, `dictGetFloat64` +- `dictGetDate` +- `dictGetDateTime` +- `dictGetUUID` +- `dictGetString` +- `dictGetIPv4`, `dictGetIPv6` All these functions have the `OrDefault` modification. For example, `dictGetDateOrDefault`. @@ -429,16 +429,16 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. **Returned value** -- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. +- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. -- If there is no requested `id_expr` in the dictionary then: +- If there is no requested `id_expr` in the dictionary then: - `dictGet[Type]` returns the content of the `` element specified for the attribute in the dictionary configuration. - `dictGet[Type]OrDefault` returns the value passed as the `default_value_expr` parameter. diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 91279cd991a..5cd2d8e0a74 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -1,13 +1,14 @@ --- slug: /en/sql-reference/functions/files -sidebar_position: 43 +sidebar_position: 75 sidebar_label: Files -title: "Functions for Working with Files" --- ## file -Reads file as a String. The file content is not parsed, so any information is read as one string and placed into the specified column. +Reads file as string and loads the data into the specified column. The actual file content is not interpreted. + +Also see table function [file](../table-functions/file.md). **Syntax** @@ -17,8 +18,8 @@ file(path[, default]) **Arguments** -- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. -- `default` — The value that will be returned in the case when a file does not exist or cannot be accessed. Data types supported: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** @@ -29,8 +30,3 @@ Query: ``` sql INSERT INTO table SELECT file('a.txt'), file('b.txt'); ``` - -**See Also** - -- [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path) -- [file](../table-functions/file.md) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 7dc798aa6c1..0b7df54b776 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/functions-for-nulls -sidebar_position: 63 +sidebar_position: 135 sidebar_label: Nullable --- @@ -18,12 +18,12 @@ Alias: `ISNULL`. **Arguments** -- `x` — A value with a non-compound data type. +- `x` — A value with a non-compound data type. **Returned value** -- `1` if `x` is `NULL`. -- `0` if `x` is not `NULL`. +- `1` if `x` is `NULL`. +- `0` if `x` is not `NULL`. **Example** @@ -58,12 +58,12 @@ isNotNull(x) **Arguments:** -- `x` — A value with a non-compound data type. +- `x` — A value with a non-compound data type. **Returned value** -- `0` if `x` is `NULL`. -- `1` if `x` is not `NULL`. +- `0` if `x` is `NULL`. +- `1` if `x` is not `NULL`. **Example** @@ -98,12 +98,12 @@ coalesce(x,...) **Arguments:** -- Any number of parameters of a non-compound type. All parameters must be compatible by data type. +- Any number of parameters of a non-compound type. All parameters must be compatible by data type. **Returned values** -- The first non-`NULL` argument. -- `NULL`, if all arguments are `NULL`. +- The first non-`NULL` argument. +- `NULL`, if all arguments are `NULL`. **Example** @@ -141,13 +141,13 @@ ifNull(x,alt) **Arguments:** -- `x` — The value to check for `NULL`. -- `alt` — The value that the function returns if `x` is `NULL`. +- `x` — The value to check for `NULL`. +- `alt` — The value that the function returns if `x` is `NULL`. **Returned values** -- The value `x`, if `x` is not `NULL`. -- The value `alt`, if `x` is `NULL`. +- The value `x`, if `x` is not `NULL`. +- The value `alt`, if `x` is `NULL`. **Example** @@ -185,8 +185,8 @@ nullIf(x, y) **Returned values** -- `NULL`, if the arguments are equal. -- The `x` value, if the arguments are not equal. +- `NULL`, if the arguments are equal. +- The `x` value, if the arguments are not equal. **Example** @@ -220,12 +220,12 @@ assumeNotNull(x) **Arguments:** -- `x` — The original value. +- `x` — The original value. **Returned values** -- The original value from the non-`Nullable` type, if it is not `NULL`. -- Implementation specific result if the original value was `NULL`. +- The original value from the non-`Nullable` type, if it is not `NULL`. +- Implementation specific result if the original value was `NULL`. **Example** @@ -282,11 +282,11 @@ toNullable(x) **Arguments:** -- `x` — The value of any non-compound type. +- `x` — The value of any non-compound type. **Returned value** -- The input value with a `Nullable` type. +- The input value with a `Nullable` type. **Example** diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md index 1e023415890..1cbc1933206 100644 --- a/docs/en/sql-reference/functions/geo/coordinates.md +++ b/docs/en/sql-reference/functions/geo/coordinates.md @@ -15,10 +15,10 @@ greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Input parameters** -- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. -- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. -- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. -- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. +- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. +- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. +- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. +- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude. @@ -31,13 +31,13 @@ Generates an exception when the input parameter values fall outside of the range **Example** ``` sql -SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673) +SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673) AS greatCircleDistance ``` ``` text -┌─greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)─┐ -│ 14132374.194975413 │ -└───────────────────────────────────────────────────────────────────┘ +┌─greatCircleDistance─┐ +│ 14128352 │ +└─────────────────────┘ ``` ## geoDistance @@ -47,6 +47,37 @@ The performance is the same as for `greatCircleDistance` (no performance drawbac Technical note: for close enough points we calculate the distance using planar approximation with the metric on the tangent plane at the midpoint of the coordinates. +``` sql +geoDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) +``` + +**Input parameters** + +- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. +- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. +- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. +- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. + +Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude. + +**Returned value** + +The distance between two points on the Earth’s surface, in meters. + +Generates an exception when the input parameter values fall outside of the range. + +**Example** + +``` sql +SELECT geoDistance(38.8976, -77.0366, 39.9496, -75.1503) AS geoDistance +``` + +``` text +┌─geoDistance─┐ +│ 212458.73 │ +└─────────────┘ +``` + ## greatCircleAngle Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance). @@ -57,10 +88,10 @@ greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Input parameters** -- `lon1Deg` — Longitude of the first point in degrees. -- `lat1Deg` — Latitude of the first point in degrees. -- `lon2Deg` — Longitude of the second point in degrees. -- `lat2Deg` — Latitude of the second point in degrees. +- `lon1Deg` — Longitude of the first point in degrees. +- `lat1Deg` — Latitude of the first point in degrees. +- `lon2Deg` — Longitude of the second point in degrees. +- `lat2Deg` — Latitude of the second point in degrees. **Returned value** @@ -89,9 +120,9 @@ pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ) **Input parameters** -- `x, y` — Coordinates of a point on the plane. -- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis. -- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates. +- `x, y` — Coordinates of a point on the plane. +- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis. +- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates. The input parameters must be `2+4⋅n`, where `n` is the number of ellipses. @@ -121,9 +152,9 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **Input values** -- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. -- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. -- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. +- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. +- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. +- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. **Returned values** diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index baed8243935..ce16af44e90 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -18,13 +18,13 @@ geohashEncode(longitude, latitude, [precision]) **Input values** -- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` -- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` -- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. +- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` +- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` +- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. **Returned values** -- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). +- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). **Example** @@ -44,11 +44,11 @@ Decodes any [geohash](#geohash)-encoded string into longitude and latitude. **Input values** -- encoded string - geohash-encoded string. +- encoded string - geohash-encoded string. **Returned values** -- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. +- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. **Example** @@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi **Arguments** -- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). :::note All coordinate parameters must be of the same type: either `Float32` or `Float64`. @@ -86,8 +86,8 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64 **Returned values** -- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. -- `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. +- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. +- `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index ce15fed0f21..1f695a13598 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -26,12 +26,12 @@ h3IsValid(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — The number is a valid H3 index. -- 0 — The number is not a valid H3 index. +- 1 — The number is a valid H3 index. +- 0 — The number is not a valid H3 index. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -63,12 +63,12 @@ h3GetResolution(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Index resolution. Range: `[0, 15]`. -- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. +- Index resolution. Range: `[0, 15]`. +- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -100,11 +100,11 @@ h3EdgeAngle(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -134,11 +134,11 @@ h3EdgeLengthM(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -168,11 +168,11 @@ h3EdgeLengthKm(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -202,14 +202,14 @@ geoToH3(lon, lat, resolution) **Arguments** -- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Hexagon index number. -- 0 in case of error. +- Hexagon index number. +- 0 in case of error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -241,11 +241,11 @@ h3ToGeo(h3Index) **Arguments** -- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -275,11 +275,11 @@ h3ToGeoBoundary(h3Index) **Arguments** -- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of pairs '(lon, lat)'. +- Array of pairs '(lon, lat)'. Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). @@ -311,12 +311,12 @@ h3kRing(h3index, k) **Arguments** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) +- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) **Returned values** -- Array of H3 indexes. +- Array of H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -354,11 +354,11 @@ h3GetBaseCell(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Hexagon base cell number. +- Hexagon base cell number. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -390,11 +390,11 @@ h3HexAreaM2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Area in square meters. +- Area in square meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -426,11 +426,11 @@ h3HexAreaKm2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Area in square kilometers. +- Area in square kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -462,13 +462,13 @@ h3IndexesAreNeighbors(index1, index2) **Arguments** -- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Indexes are neighbours. -- `0` — Indexes are not neighbours. +- `1` — Indexes are neighbours. +- `0` — Indexes are not neighbours. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -500,12 +500,12 @@ h3ToChildren(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of the child H3-indexes. +- Array of the child H3-indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -537,12 +537,12 @@ h3ToParent(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Parent H3 index. +- Parent H3 index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -572,11 +572,11 @@ h3ToString(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- String representation of the H3 index. +- String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). @@ -608,11 +608,11 @@ stringToH3(index_str) **Parameter** -- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). +- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). **Returned value** -- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -642,11 +642,11 @@ h3GetResolution(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -676,12 +676,12 @@ h3IsResClassIII(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Index has a resolution with Class III orientation. -- `0` — Index doesn't have a resolution with Class III orientation. +- `1` — Index has a resolution with Class III orientation. +- `0` — Index doesn't have a resolution with Class III orientation. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -713,12 +713,12 @@ h3IsPentagon(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Index represents a pentagonal cell. -- `0` — Index doesn't represent a pentagonal cell. +- `1` — Index represents a pentagonal cell. +- `0` — Index doesn't represent a pentagonal cell. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -750,11 +750,11 @@ h3GetFaces(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array containing icosahedron faces intersected by a given H3 index. +- Array containing icosahedron faces intersected by a given H3 index. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -786,11 +786,11 @@ h3CellAreaM2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Cell area in square meters. +- Cell area in square meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -822,11 +822,11 @@ h3CellAreaRads2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Cell area in square radians. +- Cell area in square radians. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -858,8 +858,8 @@ h3ToCenterChild(index, resolution) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** @@ -895,11 +895,11 @@ h3ExactEdgeLengthM(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in meters. +- Exact edge length in meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -931,11 +931,11 @@ h3ExactEdgeLengthKm(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in kilometers. +- Exact edge length in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -967,11 +967,11 @@ h3ExactEdgeLengthRads(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in radians. +- Exact edge length in radians. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1003,11 +1003,11 @@ h3NumHexagons(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Number of H3 indices. +- Number of H3 indices. Type: [Int64](../../../sql-reference/data-types/int-uint.md). @@ -1039,12 +1039,12 @@ h3PointDistM(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in meters. +- Haversine or great circle distance in meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1076,12 +1076,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in kilometers. +- Haversine or great circle distance in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1113,12 +1113,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in radians. +- Haversine or great circle distance in radians. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1150,7 +1150,7 @@ h3GetRes0Indexes() **Returned values** -- Array of all the resolution 0 H3 indexes. +- Array of all the resolution 0 H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -1183,11 +1183,11 @@ h3GetPentagonIndexes(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Array of all pentagon H3 indexes. +- Array of all pentagon H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -1219,8 +1219,8 @@ h3Line(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1256,12 +1256,12 @@ h3Distance(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Number of grid cells. +- Number of grid cells. Type: [Int64](../../../sql-reference/data-types/int-uint.md). @@ -1297,12 +1297,12 @@ h3HexRing(index, k) **Parameter** -- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of H3 indexes. +- Array of H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -1334,12 +1334,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex) **Parameter** -- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Unidirectional Edge Hexagon Index number. +- Unidirectional Edge Hexagon Index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -1371,12 +1371,12 @@ h3UnidirectionalEdgeisValid(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- 1 — The H3 index is a valid unidirectional edge. -- 0 — The H3 index is not a valid unidirectional edge. +- 1 — The H3 index is a valid unidirectional edge. +- 0 — The H3 index is not a valid unidirectional edge. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -1408,11 +1408,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Origin Hexagon Index number. +- Origin Hexagon Index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -1444,11 +1444,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Destination Hexagon Index number. +- Destination Hexagon Index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -1480,7 +1480,7 @@ h3GetIndexesFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1519,7 +1519,7 @@ h3GetUnidirectionalEdgesFromHexagon(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1555,11 +1555,11 @@ h3GetUnidirectionalEdgeBoundary(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Array of pairs '(lon, lat)'. +- Array of pairs '(lon, lat)'. Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md index 3d5ca806c7e..d46e60281e2 100644 --- a/docs/en/sql-reference/functions/geo/index.md +++ b/docs/en/sql-reference/functions/geo/index.md @@ -8,67 +8,67 @@ title: "Geo Functions" ## Geographical Coordinates Functions -- [greatCircleDistance](./coordinates.md#greatcircledistance) -- [geoDistance](./coordinates.md#geodistance) -- [greatCircleAngle](./coordinates.md#greatcircleangle) -- [pointInEllipses](./coordinates.md#pointinellipses) -- [pointInPolygon](./coordinates.md#pointinpolygon) +- [greatCircleDistance](./coordinates.md#greatcircledistance) +- [geoDistance](./coordinates.md#geodistance) +- [greatCircleAngle](./coordinates.md#greatcircleangle) +- [pointInEllipses](./coordinates.md#pointinellipses) +- [pointInPolygon](./coordinates.md#pointinpolygon) ## Geohash Functions -- [geohashEncode](./geohash.md#geohashencode) -- [geohashDecode](./geohash.md#geohashdecode) -- [geohashesInBox](./geohash.md#geohashesinbox) +- [geohashEncode](./geohash.md#geohashencode) +- [geohashDecode](./geohash.md#geohashdecode) +- [geohashesInBox](./geohash.md#geohashesinbox) ## H3 Indexes Functions -- [h3IsValid](./h3.md#h3isvalid) -- [h3GetResolution](./h3.md#h3getresolution) -- [h3EdgeAngle](./h3.md#h3edgeangle) -- [h3EdgeLengthM](./h3.md#h3edgelengthm) -- [h3EdgeLengthKm](./h3.md#h3edgelengthkm) -- [geoToH3](./h3.md#geotoh3) -- [h3ToGeo](./h3.md#h3togeo) -- [h3ToGeoBoundary](./h3.md#h3togeoboundary) -- [h3kRing](./h3.md#h3kring) -- [h3GetBaseCell](./h3.md#h3getbasecell) -- [h3HexAreaM2](./h3.md#h3hexaream2) -- [h3HexAreaKm2](./h3.md#h3hexareakm2) -- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors) -- [h3ToChildren](./h3.md#h3tochildren) -- [h3ToParent](./h3.md#h3toparent) -- [h3ToString](./h3.md#h3tostring) -- [stringToH3](./h3.md#stringtoh3) -- [h3GetResolution](./h3.md#h3getresolution) -- [h3IsResClassIII](./h3.md#h3isresclassiii) -- [h3IsPentagon](./h3.md#h3ispentagon) -- [h3GetFaces](./h3.md#h3getfaces) -- [h3CellAreaM2](./h3.md#h3cellaream2) -- [h3CellAreaRads2](./h3.md#h3cellarearads2) -- [h3ToCenterChild](./h3.md#h3tocenterchild) -- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm) -- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm) -- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads) -- [h3NumHexagons](./h3.md#h3numhexagons) -- [h3Line](./h3.md#h3line) -- [h3Distance](./h3.md#h3distance) -- [h3HexRing](./h3.md#h3hexring) -- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge) -- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid) -- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge) -- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge) -- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge) -- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon) -- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary) +- [h3IsValid](./h3.md#h3isvalid) +- [h3GetResolution](./h3.md#h3getresolution) +- [h3EdgeAngle](./h3.md#h3edgeangle) +- [h3EdgeLengthM](./h3.md#h3edgelengthm) +- [h3EdgeLengthKm](./h3.md#h3edgelengthkm) +- [geoToH3](./h3.md#geotoh3) +- [h3ToGeo](./h3.md#h3togeo) +- [h3ToGeoBoundary](./h3.md#h3togeoboundary) +- [h3kRing](./h3.md#h3kring) +- [h3GetBaseCell](./h3.md#h3getbasecell) +- [h3HexAreaM2](./h3.md#h3hexaream2) +- [h3HexAreaKm2](./h3.md#h3hexareakm2) +- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors) +- [h3ToChildren](./h3.md#h3tochildren) +- [h3ToParent](./h3.md#h3toparent) +- [h3ToString](./h3.md#h3tostring) +- [stringToH3](./h3.md#stringtoh3) +- [h3GetResolution](./h3.md#h3getresolution) +- [h3IsResClassIII](./h3.md#h3isresclassiii) +- [h3IsPentagon](./h3.md#h3ispentagon) +- [h3GetFaces](./h3.md#h3getfaces) +- [h3CellAreaM2](./h3.md#h3cellaream2) +- [h3CellAreaRads2](./h3.md#h3cellarearads2) +- [h3ToCenterChild](./h3.md#h3tocenterchild) +- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm) +- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm) +- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads) +- [h3NumHexagons](./h3.md#h3numhexagons) +- [h3Line](./h3.md#h3line) +- [h3Distance](./h3.md#h3distance) +- [h3HexRing](./h3.md#h3hexring) +- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge) +- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid) +- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge) +- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge) +- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge) +- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon) +- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary) ## S2 Index Functions -- [geoToS2](./s2.md#geotos2) -- [s2ToGeo](./s2.md#s2togeo) -- [s2GetNeighbors](./s2.md#s2getneighbors) -- [s2CellsIntersect](./s2.md#s2cellsintersect) -- [s2CapContains](./s2.md#s2capcontains) -- [s2CapUnion](./s2.md#s2capunion) -- [s2RectAdd](./s2.md#s2rectadd) -- [s2RectContains](./s2.md#s2rectcontains) -- [s2RectUnion](./s2.md#s2rectunion) -- [s2RectIntersection](./s2.md#s2rectintersection) +- [geoToS2](./s2.md#geotos2) +- [s2ToGeo](./s2.md#s2togeo) +- [s2GetNeighbors](./s2.md#s2getneighbors) +- [s2CellsIntersect](./s2.md#s2cellsintersect) +- [s2CapContains](./s2.md#s2capcontains) +- [s2CapUnion](./s2.md#s2capunion) +- [s2RectAdd](./s2.md#s2rectadd) +- [s2RectContains](./s2.md#s2rectcontains) +- [s2RectUnion](./s2.md#s2rectunion) +- [s2RectIntersection](./s2.md#s2rectintersection) diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md new file mode 100644 index 00000000000..4a8653965c2 --- /dev/null +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -0,0 +1,396 @@ +--- +slug: /en/sql-reference/functions/geo/polygons +sidebar_label: Polygons +title: "Functions for Working with Polygons" +--- + +## readWKTMultiPolygon + +Converts a WKT (Well Known Text) MultiPolygon into a MultiPolygon type. + +### Example + +``` sql +SELECT + toTypeName(readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))')) AS type, + readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))') AS output FORMAT Markdown + +``` +| type | output | +|:-|:-| +| MultiPolygon | [[[(2,0),(10,0),(10,10),(0,10),(2,0)],[(4,4),(5,4),(5,5),(4,5),(4,4)]],[[(-10,-10),(-10,-9),(-9,10),(-10,-10)]]] | + + +### Input parameters + +String starting with `MULTIPOLYGON` + +### Returned value + +MultiPolygon + +## readWKTPolygon + +Converts a WKT (Well Known Text) MultiPolygon into a Polygon type. + +### Example + +``` sql +SELECT + toTypeName(readWKTPolygon('POLYGON((2 0,10 0,10 10,0 10,2 0))')) AS type, + readWKTPolygon('POLYGON((2 0,10 0,10 10,0 10,2 0))') AS output +FORMAT Markdown +``` +| type | output | +|:-|:-| +| Polygon | [[(2,0),(10,0),(10,10),(0,10),(2,0)]] | + +### Input parameters + +String starting with `POLYGON` + +### Returned value + +Polygon + +## polygonsWithinSpherical + +Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html + +### Example + +``` sql +select polygonsWithinSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]); +``` +```response +0 +``` + +### Input parameters + +### Returned value + +UInt8, 0 for false, 1 for true + +## polygonsDistanceSpherical + +Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise. + +### Example + +``` sql +SELECT polygonsDistanceSpherical([[[(0, 0), (0, 0.1), (0.1, 0.1), (0.1, 0)]]], [[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]]]) +``` +```response +0.24372872211133834 +``` + +### Input parameters + +Two polygons + +### Returned value + +Float64 + +## polygonsDistanceCartesian + +Calculates distance between two polygons + +### Example + +``` sql +SELECT polygonsDistanceCartesian([[[(0, 0), (0, 0.1), (0.1, 0.1), (0.1, 0)]]], [[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]]]) +``` +```response +14.000714267493642 +``` + +### Input parameters + +Two polygons + +### Returned value + +Float64 + +## polygonsEqualsCartesian + +Returns true if two polygons are equal + +### Example + +``` sql +SELECT polygonsEqualsCartesian([[[(1., 1.), (1., 4.), (4., 4.), (4., 1.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]]) +``` +```response +1 +``` + +### Input parameters + +Two polygons + +### Returned value + +UInt8, 0 for false, 1 for true + +## polygonsSymDifferenceSpherical + +Calculates the spatial set theoretic symmetric difference (XOR) between two polygons + +### Example + +``` sql +SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[(50., 50.), (50., -50.), (-50., -50.), (-50., 50.), (50., 50.)], [(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)], [(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]], [[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]))); +``` +```response +MULTIPOLYGON(((-20 -10.3067,-10 -10,-10 -20.8791,-20 -20,-20 -10.3067)),((10 20.8791,20 20,20 10.3067,10 10,10 20.8791)),((50 50,50 -50,-50 -50,-50 50,50 50),(20 10.3067,40 10,40 40,10 40,10 20.8791,-20 20,-20 -10.3067,-40 -10,-40 -40,-10 -40,-10 -20.8791,20 -20,20 10.3067))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonsSymDifferenceCartesian + +The same as `polygonsSymDifferenceSpherical`, but the coordinates are in the Cartesian coordinate system; which is more close to the model of the real Earth. + +### Example + +``` sql +SELECT wkt(polygonsSymDifferenceCartesian([[[(0, 0), (0, 3), (1, 2.9), (2, 2.6), (2.6, 2), (2.9, 1), (3, 0), (0, 0)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]])) +``` +```response +MULTIPOLYGON(((1 2.9,1 1,2.9 1,3 0,0 0,0 3,1 2.9)),((1 2.9,1 4,4 4,4 1,2.9 1,2.6 2,2 2.6,1 2.9))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonsIntersectionSpherical + +Calculates the intersection (AND) between polygons, coordinates are spherical. + +### Example + +``` sql +SELECT wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), polygonsIntersectionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]))) +``` +```response +MULTIPOLYGON(((4.3666 50.8434,4.36024 50.8436,4.34956 50.8536,4.35268 50.8567,4.36794 50.8525,4.3666 50.8434))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonsWithinCartesian + +Returns true if the second polygon is within the first polygon. + +### Example + +``` sql +SELECT polygonsWithinCartesian([[[(2., 2.), (2., 3.), (3., 3.), (3., 2.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]]) +``` +```response +1 +``` + +### Input parameters + +Two polygons + +### Returned value + +UInt8, 0 for false, 1 for true + +## polygonConvexHullCartesian + +Calculates a convex hull. [Reference](https://www.boost.org/doc/libs/1_61_0/libs/geometry/doc/html/geometry/reference/algorithms/convex_hull.html) + +Coordinates are in Cartesian coordinate system. + +### Example + +``` sql +SELECT wkt(polygonConvexHullCartesian([[[(0., 0.), (0., 5.), (5., 5.), (5., 0.), (2., 3.)]]])) +``` +```response +POLYGON((0 0,0 5,5 5,5 0,0 0)) +``` + +### Input parameters + +MultiPolygon + +### Returned value + +Polygon + +## polygonAreaSpherical + +Calculates the surface area of a polygon. + +### Example + +``` sql +SELECT round(polygonAreaSpherical([[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]), 14) +``` +```response +9.387704e-8 +``` + +### Input parameters + +Polygon + +### Returned value + +Float + +## polygonsUnionSpherical + +Calculates a union (OR). + +### Example + +``` sql +SELECT wkt(polygonsUnionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]])) +``` +```response +MULTIPOLYGON(((4.36661 50.8434,4.36623 50.8408,4.34496 50.8333,4.33807 50.8487,4.34669 50.8583,4.35268 50.8567,4.36136 50.8652,4.36131 50.8651,4.39045 50.8565,4.38303 50.8429,4.36661 50.8434))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonPerimeterSpherical + +Calculates the perimeter of the polygon. + +### Example + +This is the polygon representing Zimbabwe: + + +``` +POLYGON((30.0107 -15.6462,30.0502 -15.6401,30.09 -15.6294,30.1301 -15.6237,30.1699 -15.6322,30.1956 -15.6491,30.2072 -15.6532,30.2231 -15.6497,30.231 -15.6447,30.2461 -15.6321,30.2549 -15.6289,30.2801 -15.6323,30.2962 -15.639,30.3281 -15.6524,30.3567 -15.6515,30.3963 -15.636,30.3977 -15.7168,30.3993 -15.812,30.4013 -15.9317,30.4026 -16.0012,30.5148 -16.0004,30.5866 -16,30.7497 -15.9989,30.8574 -15.9981,30.9019 -16.0071,30.9422 -16.0345,30.9583 -16.0511,30.9731 -16.062,30.9898 -16.0643,31.012 -16.0549,31.0237 -16.0452,31.0422 -16.0249,31.0569 -16.0176,31.0654 -16.0196,31.0733 -16.0255,31.0809 -16.0259,31.089 -16.0119,31.1141 -15.9969,31.1585 -16.0002,31.26 -16.0235,31.2789 -16.0303,31.2953 -16.0417,31.3096 -16.059,31.3284 -16.0928,31.3409 -16.1067,31.3603 -16.1169,31.3703 -16.1237,31.3746 -16.1329,31.3778 -16.1422,31.384 -16.1488,31.3877 -16.1496,31.3956 -16.1477,31.3996 -16.1473,31.4043 -16.1499,31.4041 -16.1545,31.4027 -16.1594,31.4046 -16.1623,31.4241 -16.1647,31.4457 -16.165,31.4657 -16.1677,31.4806 -16.178,31.5192 -16.1965,31.6861 -16.2072,31.7107 -16.2179,31.7382 -16.2398,31.7988 -16.3037,31.8181 -16.3196,31.8601 -16.3408,31.8719 -16.3504,31.8807 -16.368,31.8856 -16.4063,31.8944 -16.4215,31.9103 -16.4289,32.0141 -16.4449,32.2118 -16.4402,32.2905 -16.4518,32.3937 -16.4918,32.5521 -16.5534,32.6718 -16.5998,32.6831 -16.6099,32.6879 -16.6243,32.6886 -16.6473,32.6987 -16.6868,32.7252 -16.7064,32.7309 -16.7087,32.7313 -16.7088,32.7399 -16.7032,32.7538 -16.6979,32.7693 -16.6955,32.8007 -16.6973,32.862 -16.7105,32.8934 -16.7124,32.9096 -16.7081,32.9396 -16.6898,32.9562 -16.6831,32.9685 -16.6816,32.9616 -16.7103,32.9334 -16.8158,32.9162 -16.8479,32.9005 -16.8678,32.8288 -16.9351,32.8301 -16.9415,32.8868 -17.0382,32.9285 -17.1095,32.9541 -17.1672,32.9678 -17.2289,32.9691 -17.2661,32.9694 -17.2761,32.9732 -17.2979,32.9836 -17.3178,32.9924 -17.3247,33.0147 -17.3367,33.0216 -17.3456,33.0225 -17.3615,33.0163 -17.3772,33.0117 -17.384,32.9974 -17.405,32.9582 -17.4785,32.9517 -17.4862,32.943 -17.4916,32.9366 -17.4983,32.9367 -17.5094,32.9472 -17.5432,32.9517 -17.5514,32.9691 -17.5646,33.0066 -17.581,33.0204 -17.5986,33.0245 -17.6192,33.0206 -17.6385,33.0041 -17.6756,33.0002 -17.7139,33.0032 -17.7577,32.9991 -17.7943,32.9736 -17.8106,32.957 -17.818,32.9461 -17.8347,32.9397 -17.8555,32.9369 -17.875,32.9384 -17.8946,32.9503 -17.9226,32.9521 -17.9402,32.9481 -17.9533,32.9404 -17.96,32.9324 -17.9649,32.9274 -17.9729,32.929 -17.9823,32.9412 -17.9963,32.9403 -18.0048,32.9349 -18.0246,32.9371 -18.0471,32.9723 -18.1503,32.9755 -18.1833,32.9749 -18.1908,32.9659 -18.2122,32.9582 -18.2254,32.9523 -18.233,32.9505 -18.2413,32.955 -18.2563,32.9702 -18.2775,33.0169 -18.3137,33.035 -18.3329,33.0428 -18.352,33.0381 -18.3631,33.0092 -18.3839,32.9882 -18.4132,32.9854 -18.4125,32.9868 -18.4223,32.9995 -18.4367,33.003 -18.4469,32.9964 -18.4671,32.9786 -18.4801,32.9566 -18.4899,32.9371 -18.501,32.9193 -18.51,32.9003 -18.5153,32.8831 -18.5221,32.8707 -18.5358,32.8683 -18.5526,32.8717 -18.5732,32.8845 -18.609,32.9146 -18.6659,32.9223 -18.6932,32.9202 -18.7262,32.9133 -18.753,32.9025 -18.7745,32.8852 -18.7878,32.8589 -18.79,32.8179 -18.787,32.7876 -18.7913,32.6914 -18.8343,32.6899 -18.8432,32.6968 -18.8972,32.7032 -18.9119,32.7158 -18.9198,32.7051 -18.9275,32.6922 -18.9343,32.6825 -18.9427,32.6811 -18.955,32.6886 -18.9773,32.6903 -18.9882,32.6886 -19.001,32.6911 -19.0143,32.699 -19.0222,32.7103 -19.026,32.7239 -19.0266,32.786 -19.0177,32.8034 -19.0196,32.8142 -19.0238,32.82 -19.0283,32.823 -19.0352,32.8253 -19.0468,32.8302 -19.0591,32.8381 -19.0669,32.8475 -19.0739,32.8559 -19.0837,32.8623 -19.1181,32.8332 -19.242,32.8322 -19.2667,32.8287 -19.2846,32.8207 -19.3013,32.8061 -19.3234,32.7688 -19.3636,32.7665 -19.3734,32.7685 -19.4028,32.7622 -19.4434,32.7634 -19.464,32.7739 -19.4759,32.7931 -19.4767,32.8113 -19.4745,32.8254 -19.4792,32.8322 -19.5009,32.8325 -19.5193,32.8254 -19.5916,32.8257 -19.6008,32.8282 -19.6106,32.8296 -19.6237,32.8254 -19.6333,32.8195 -19.642,32.8163 -19.6521,32.8196 -19.6743,32.831 -19.6852,32.8491 -19.6891,32.8722 -19.6902,32.8947 -19.6843,32.9246 -19.6553,32.9432 -19.6493,32.961 -19.6588,32.9624 -19.6791,32.9541 -19.7178,32.9624 -19.7354,32.9791 -19.7514,33.0006 -19.7643,33.0228 -19.7731,33.0328 -19.7842,33.0296 -19.8034,33.0229 -19.8269,33.0213 -19.8681,33.002 -19.927,32.9984 -20.0009,33.0044 -20.0243,33.0073 -20.032,32.9537 -20.0302,32.9401 -20.0415,32.9343 -20.0721,32.9265 -20.0865,32.9107 -20.0911,32.8944 -20.094,32.8853 -20.103,32.8779 -20.1517,32.8729 -20.1672,32.8593 -20.1909,32.8571 -20.2006,32.8583 -20.2075,32.8651 -20.2209,32.8656 -20.2289,32.8584 -20.2595,32.853 -20.2739,32.8452 -20.2867,32.8008 -20.3386,32.7359 -20.4142,32.7044 -20.4718,32.6718 -20.5318,32.6465 -20.558,32.6037 -20.5648,32.5565 -20.5593,32.5131 -20.5646,32.4816 -20.603,32.4711 -20.6455,32.4691 -20.6868,32.4835 -20.7942,32.4972 -20.8981,32.491 -20.9363,32.4677 -20.9802,32.4171 -21.0409,32.3398 -21.1341,32.3453 -21.1428,32.3599 -21.1514,32.3689 -21.163,32.3734 -21.1636,32.3777 -21.1634,32.3806 -21.1655,32.3805 -21.1722,32.3769 -21.1785,32.373 -21.184,32.3717 -21.1879,32.4446 -21.3047,32.4458 -21.309,32.4472 -21.3137,32.4085 -21.2903,32.373 -21.3279,32.3245 -21.3782,32.2722 -21.4325,32.2197 -21.4869,32.1673 -21.5413,32.1148 -21.5956,32.0624 -21.65,32.01 -21.7045,31.9576 -21.7588,31.9052 -21.8132,31.8527 -21.8676,31.8003 -21.922,31.7478 -21.9764,31.6955 -22.0307,31.6431 -22.0852,31.5907 -22.1396,31.5382 -22.1939,31.4858 -22.2483,31.4338 -22.302,31.3687 -22.345,31.2889 -22.3973,31.2656 -22.3655,31.2556 -22.358,31.2457 -22.3575,31.2296 -22.364,31.2215 -22.3649,31.2135 -22.3619,31.1979 -22.3526,31.1907 -22.3506,31.1837 -22.3456,31.1633 -22.3226,31.1526 -22.3164,31.1377 -22.3185,31.1045 -22.3334,31.097 -22.3349,31.0876 -22.3369,31.0703 -22.3337,31.0361 -22.3196,30.9272 -22.2957,30.8671 -22.2896,30.8379 -22.2823,30.8053 -22.2945,30.6939 -22.3028,30.6743 -22.3086,30.6474 -22.3264,30.6324 -22.3307,30.6256 -22.3286,30.6103 -22.3187,30.6011 -22.3164,30.5722 -22.3166,30.5074 -22.3096,30.4885 -22.3102,30.4692 -22.3151,30.4317 -22.3312,30.4127 -22.3369,30.3721 -22.3435,30.335 -22.3447,30.3008 -22.337,30.2693 -22.3164,30.2553 -22.3047,30.2404 -22.2962,30.2217 -22.2909,30.197 -22.2891,30.1527 -22.2948,30.1351 -22.2936,30.1111 -22.2823,30.0826 -22.2629,30.0679 -22.2571,30.0381 -22.2538,30.0359 -22.2506,30.0345 -22.2461,30.0155 -22.227,30.0053 -22.2223,29.9838 -22.2177,29.974 -22.214,29.9467 -22.1983,29.9321 -22.1944,29.896 -22.1914,29.8715 -22.1793,29.8373 -22.1724,29.7792 -22.1364,29.7589 -22.1309,29.6914 -22.1341,29.6796 -22.1383,29.6614 -22.1265,29.6411 -22.1292,29.604 -22.1451,29.5702 -22.142,29.551 -22.146,29.5425 -22.1625,29.5318 -22.1724,29.5069 -22.1701,29.4569 -22.1588,29.4361 -22.1631,29.3995 -22.1822,29.378 -22.1929,29.3633 -22.1923,29.3569 -22.1909,29.3501 -22.1867,29.2736 -22.1251,29.2673 -22.1158,29.2596 -22.0961,29.2541 -22.0871,29.2444 -22.0757,29.2393 -22.0726,29.1449 -22.0753,29.108 -22.0692,29.0708 -22.051,29.0405 -22.0209,29.0216 -21.9828,29.0138 -21.9404,29.0179 -21.8981,29.0289 -21.8766,29.0454 -21.8526,29.0576 -21.8292,29.0553 -21.81,29.0387 -21.7979,28.9987 -21.786,28.9808 -21.7748,28.9519 -21.7683,28.891 -21.7649,28.8609 -21.7574,28.7142 -21.6935,28.6684 -21.68,28.6297 -21.6513,28.6157 -21.6471,28.5859 -21.6444,28.554 -21.6366,28.5429 -21.6383,28.5325 -21.6431,28.4973 -21.6515,28.4814 -21.6574,28.4646 -21.6603,28.4431 -21.6558,28.3618 -21.6163,28.3219 -21.6035,28.2849 -21.5969,28.1657 -21.5952,28.0908 -21.5813,28.0329 -21.5779,28.0166 -21.5729,28.0026 -21.5642,27.9904 -21.5519,27.9847 -21.5429,27.9757 -21.5226,27.9706 -21.5144,27.9637 -21.5105,27.9581 -21.5115,27.9532 -21.5105,27.9493 -21.5008,27.9544 -21.4878,27.9504 -21.482,27.9433 -21.4799,27.9399 -21.478,27.9419 -21.4685,27.9496 -21.4565,27.953 -21.4487,27.9502 -21.4383,27.9205 -21.3812,27.9042 -21.3647,27.8978 -21.3554,27.8962 -21.3479,27.8967 -21.3324,27.8944 -21.3243,27.885 -21.3102,27.8491 -21.2697,27.8236 -21.2317,27.7938 -21.1974,27.7244 -21.1497,27.7092 -21.1345,27.6748 -21.0901,27.6666 -21.0712,27.6668 -21.0538,27.679 -21.0007,27.6804 -20.9796,27.6727 -20.9235,27.6726 -20.9137,27.6751 -20.8913,27.6748 -20.8799,27.676 -20.8667,27.6818 -20.8576,27.689 -20.849,27.6944 -20.8377,27.7096 -20.7567,27.7073 -20.7167,27.6825 -20.6373,27.6904 -20.6015,27.7026 -20.5661,27.7056 -20.5267,27.6981 -20.5091,27.6838 -20.4961,27.666 -20.4891,27.6258 -20.4886,27.5909 -20.4733,27.5341 -20.483,27.4539 -20.4733,27.3407 -20.473,27.306 -20.4774,27.2684 -20.4958,27.284 -20.3515,27.266 -20.2342,27.2149 -20.1105,27.2018 -20.093,27.1837 -20.0823,27.1629 -20.0766,27.1419 -20.0733,27.1297 -20.0729,27.1198 -20.0739,27.1096 -20.0732,27.0973 -20.0689,27.0865 -20.0605,27.0692 -20.0374,27.0601 -20.0276,27.0267 -20.0101,26.9943 -20.0068,26.9611 -20.0072,26.9251 -20.0009,26.8119 -19.9464,26.7745 -19.9398,26.7508 -19.9396,26.731 -19.9359,26.7139 -19.9274,26.6986 -19.9125,26.6848 -19.8945,26.6772 -19.8868,26.6738 -19.8834,26.6594 -19.8757,26.6141 -19.8634,26.5956 -19.8556,26.5819 -19.8421,26.5748 -19.8195,26.5663 -19.8008,26.5493 -19.7841,26.5089 -19.7593,26.4897 -19.7519,26.4503 -19.7433,26.4319 -19.7365,26.4128 -19.7196,26.3852 -19.6791,26.3627 -19.6676,26.3323 -19.6624,26.3244 -19.6591,26.3122 -19.6514,26.3125 -19.6496,26.3191 -19.6463,26.3263 -19.6339,26.3335 -19.613,26.331 -19.605,26.3211 -19.592,26.3132 -19.5842,26.3035 -19.5773,26.2926 -19.5725,26.2391 -19.5715,26.1945 -19.5602,26.1555 -19.5372,26.1303 -19.5011,26.0344 -19.2437,26.0114 -19.1998,25.9811 -19.1618,25.9565 -19.1221,25.9486 -19.1033,25.9449 -19.0792,25.9481 -19.0587,25.9644 -19.0216,25.9678 -19.001,25.9674 -18.9999,25.9407 -18.9213,25.8153 -18.814,25.7795 -18.7388,25.7734 -18.6656,25.7619 -18.6303,25.7369 -18.6087,25.6983 -18.5902,25.6695 -18.566,25.6221 -18.5011,25.6084 -18.4877,25.5744 -18.4657,25.5085 -18.3991,25.4956 -18.3789,25.4905 -18.3655,25.4812 -18.3234,25.4732 -18.3034,25.4409 -18.2532,25.4088 -18.176,25.3875 -18.139,25.3574 -18.1158,25.3234 -18.0966,25.2964 -18.0686,25.255 -18.0011,25.2261 -17.9319,25.2194 -17.908,25.2194 -17.8798,25.2598 -17.7941,25.2667 -17.8009,25.2854 -17.8093,25.3159 -17.8321,25.3355 -17.8412,25.3453 -17.8426,25.3765 -17.8412,25.4095 -17.853,25.4203 -17.8549,25.4956 -17.8549,25.5007 -17.856,25.5102 -17.8612,25.5165 -17.8623,25.5221 -17.8601,25.5309 -17.851,25.5368 -17.8487,25.604 -17.8362,25.657 -17.8139,25.6814 -17.8115,25.6942 -17.8194,25.7064 -17.8299,25.7438 -17.8394,25.766 -17.8498,25.786 -17.8622,25.7947 -17.8727,25.8044 -17.8882,25.8497 -17.9067,25.8636 -17.9238,25.8475 -17.9294,25.8462 -17.9437,25.8535 -17.96,25.8636 -17.9716,25.9245 -17.999,25.967 -18.0005,25.9785 -17.999,26.0337 -17.9716,26.0406 -17.9785,26.0466 -17.9663,26.0625 -17.9629,26.0812 -17.9624,26.0952 -17.9585,26.0962 -17.9546,26.0942 -17.9419,26.0952 -17.9381,26.1012 -17.9358,26.1186 -17.9316,26.1354 -17.9226,26.1586 -17.9183,26.1675 -17.9136,26.203 -17.8872,26.2119 -17.8828,26.2211 -17.8863,26.2282 -17.8947,26.2339 -17.904,26.2392 -17.9102,26.2483 -17.9134,26.2943 -17.9185,26.3038 -17.9228,26.312 -17.9284,26.3183 -17.9344,26.3255 -17.936,26.3627 -17.9306,26.4086 -17.939,26.4855 -17.9793,26.5271 -17.992,26.5536 -17.9965,26.5702 -18.0029,26.5834 -18.0132,26.5989 -18.03,26.6127 -18.0412,26.6288 -18.0492,26.6857 -18.0668,26.7 -18.0692,26.7119 -18.0658,26.7406 -18.0405,26.7536 -18.033,26.7697 -18.029,26.794 -18.0262,26.8883 -17.9846,26.912 -17.992,26.9487 -17.9689,26.9592 -17.9647,27.0063 -17.9627,27.0213 -17.9585,27.0485 -17.9443,27.0782 -17.917,27.1154 -17.8822,27.149 -17.8425,27.1465 -17.8189,27.1453 -17.7941,27.147 -17.7839,27.1571 -17.7693,27.4221 -17.5048,27.5243 -17.4151,27.5773 -17.3631,27.6045 -17.3128,27.6249 -17.2333,27.6412 -17.1985,27.7773 -17.0012,27.8169 -16.9596,27.8686 -16.9297,28.023 -16.8654,28.1139 -16.8276,28.2125 -16.7486,28.2801 -16.7065,28.6433 -16.5688,28.6907 -16.5603,28.7188 -16.5603,28.7328 -16.5581,28.7414 -16.5507,28.7611 -16.5323,28.7693 -16.5152,28.8089 -16.4863,28.8225 -16.4708,28.8291 -16.4346,28.8331 -16.4264,28.8572 -16.3882,28.857 -16.3655,28.8405 -16.3236,28.8368 -16.3063,28.8403 -16.2847,28.8642 -16.2312,28.8471 -16.2027,28.8525 -16.1628,28.8654 -16.1212,28.871 -16.0872,28.8685 -16.0822,28.8638 -16.0766,28.8593 -16.0696,28.8572 -16.0605,28.8603 -16.0494,28.8741 -16.0289,28.8772 -16.022,28.8989 -15.9955,28.9324 -15.9637,28.9469 -15.9572,28.9513 -15.9553,28.9728 -15.9514,29.0181 -15.9506,29.0423 -15.9463,29.0551 -15.9344,29.0763 -15.8954,29.0862 -15.8846,29.1022 -15.8709,29.1217 -15.8593,29.1419 -15.8545,29.151 -15.8488,29.1863 -15.8128,29.407 -15.7142,29.4221 -15.711,29.5085 -15.7036,29.5262 -15.6928,29.5634 -15.6621,29.5872 -15.6557,29.6086 -15.6584,29.628 -15.6636,29.6485 -15.6666,29.6728 -15.6633,29.73 -15.6447,29.7733 -15.6381,29.8143 -15.6197,29.8373 -15.6148,29.8818 -15.6188,29.9675 -15.6415,30.0107 -15.6462)) +``` + +``` sql +SELECT round(polygonPerimeterSpherical([(30.010654, -15.646227), (30.050238, -15.640129), (30.090029, -15.629381), (30.130129, -15.623696), (30.16992, -15.632171), (30.195552, -15.649121), (30.207231, -15.653152), (30.223147, -15.649741), (30.231002, -15.644677), (30.246091, -15.632068), (30.254876, -15.628864), (30.280094, -15.632275), (30.296196, -15.639042), (30.32805, -15.652428), (30.356679, -15.651498), (30.396263, -15.635995), (30.39771, -15.716817), (30.39926, -15.812005), (30.401327, -15.931688), (30.402568, -16.001244), (30.514809, -16.000418), (30.586587, -16.000004), (30.74973, -15.998867), (30.857424, -15.998144), (30.901865, -16.007136), (30.942173, -16.034524), (30.958296, -16.05106), (30.973075, -16.062016), (30.989767, -16.06429), (31.012039, -16.054885), (31.023718, -16.045169), (31.042218, -16.024912), (31.056895, -16.017574), (31.065421, -16.019641), (31.073328, -16.025532), (31.080872, -16.025946), (31.089037, -16.01189), (31.1141, -15.996904), (31.15849, -16.000211), (31.259983, -16.023465), (31.278897, -16.030287), (31.29533, -16.041655), (31.309592, -16.059019), (31.328351, -16.092815), (31.340908, -16.106664), (31.360339, -16.116896), (31.37026, -16.123718), (31.374601, -16.132916), (31.377754, -16.142218), (31.384006, -16.148832), (31.387727, -16.149556), (31.395582, -16.147695), (31.399613, -16.147282), (31.404315, -16.149866), (31.404057, -16.154517), (31.402713, -16.159374), (31.404574, -16.162268), (31.424107, -16.164749), (31.445708, -16.164955), (31.465655, -16.167746), (31.480641, -16.177978), (31.519192, -16.196478), (31.686107, -16.207227), (31.710705, -16.217872), (31.738197, -16.239783), (31.798761, -16.303655), (31.818088, -16.319571), (31.86005, -16.340759), (31.871935, -16.35037), (31.88072, -16.368044), (31.88563, -16.406284), (31.894363, -16.421477), (31.910279, -16.428919), (32.014149, -16.444938), (32.211759, -16.440184), (32.290463, -16.45176), (32.393661, -16.491757), (32.5521, -16.553355), (32.671783, -16.599761), (32.6831, -16.609889), (32.687906, -16.624255), (32.68863, -16.647303), (32.698655, -16.686784), (32.725217, -16.706421), (32.73095, -16.708656), (32.731314, -16.708798), (32.739893, -16.703217), (32.753845, -16.697946), (32.769348, -16.695466), (32.800664, -16.697326), (32.862004, -16.710452), (32.893372, -16.712415), (32.909598, -16.708075), (32.93957, -16.689781), (32.95621, -16.683063), (32.968509, -16.681615999999998), (32.961585, -16.710348), (32.933369, -16.815768), (32.916213, -16.847911), (32.900503, -16.867755), (32.828776, -16.935141), (32.83012, -16.941549), (32.886757, -17.038184), (32.928512, -17.109497), (32.954143, -17.167168), (32.967786, -17.22887), (32.96909, -17.266115), (32.969439, -17.276102), (32.973212, -17.297909), (32.983599, -17.317753), (32.992384, -17.324678), (33.014656, -17.336667), (33.021633, -17.345555), (33.022459, -17.361471), (33.016258, -17.377181), (33.011651, -17.383991), (32.997448, -17.404983), (32.958174, -17.478467), (32.951663, -17.486218), (32.942981, -17.491593), (32.936573, -17.498311), (32.936676, -17.509369), (32.947218, -17.543166), (32.951663, -17.551434), (32.969129, -17.56456), (33.006646, -17.580993), (33.020392, -17.598563), (33.024526, -17.619233), (33.020599, -17.638457), (33.004063, -17.675561), (33.000238, -17.713905), (33.003184, -17.757726), (32.999102, -17.794313), (32.973573, -17.810643), (32.957037, -17.817981), (32.946082, -17.834724), (32.939674, -17.855498), (32.936883, -17.875032), (32.938433, -17.894566), (32.950267, -17.922574), (32.952128, -17.940247), (32.948149, -17.95327), (32.940397, -17.959988), (32.932439, -17.964949), (32.927375, -17.972907), (32.928977, -17.982312), (32.941224, -17.996265), (32.940294, -18.004843), (32.934919, -18.024583), (32.93709, -18.047114), (32.972282, -18.150261), (32.975537, -18.183333), (32.974865, -18.190775), (32.965925, -18.212169), (32.958174, -18.225398), (32.952283, -18.233046), (32.950525999999996, -18.241314), (32.95497, -18.256301), (32.970163, -18.277488), (33.016878, -18.313661), (33.034965, -18.332885), (33.042768, -18.352005), (33.038066, -18.363064), (33.00923, -18.383941), (32.988198, -18.41319), (32.985356, -18.412467), (32.986803, -18.422285), (32.999515, -18.436651), (33.003029, -18.446883), (32.996414, -18.46714), (32.978586, -18.48006), (32.956624, -18.489878), (32.937142, -18.50104), (32.919313, -18.510032), (32.900296, -18.515303), (32.88314, -18.522124), (32.870737, -18.535767), (32.868257, -18.552613), (32.871668, -18.57318), (32.884483, -18.609044), (32.914559, -18.665888), (32.92231, -18.693173), (32.920243, -18.726246), (32.913267, -18.753014), (32.902518, -18.774512), (32.885207, -18.787844), (32.858852, -18.790015), (32.817924, -18.787018), (32.787642, -18.791255), (32.69142, -18.83425), (32.68987, -18.843241), (32.696794, -18.897192), (32.703202, -18.911868), (32.71576, -18.919826), (32.705063, -18.927474), (32.692247, -18.934295), (32.682532, -18.942667), (32.681085, -18.954966), (32.68863, -18.97729), (32.690283, -18.988246), (32.68863, -19.000958), (32.691058, -19.01429), (32.698965, -19.022249), (32.710282, -19.025969), (32.723873, -19.026589), (32.785988, -19.017701), (32.803351, -19.019561), (32.814203, -19.023799), (32.819991, -19.028346), (32.822988, -19.035168), (32.825262, -19.046847), (32.830223, -19.059146), (32.83813, -19.066897), (32.847483, -19.073925), (32.855906, -19.083744), (32.862262, -19.118057), (32.83322, -19.241977), (32.832187, -19.266678), (32.828673, -19.284558), (32.820715, -19.301301), (32.806142, -19.323419), (32.768831, -19.363623), (32.766454, -19.373442), (32.768521, -19.402794), (32.762217, -19.443412), (32.763354, -19.463979), (32.773947, -19.475864), (32.793119, -19.476691), (32.811309, -19.474521), (32.825365, -19.479172), (32.832187, -19.500876), (32.832497000000004, -19.519273), (32.825365, -19.59162), (32.825675, -19.600818), (32.828156, -19.610636), (32.829603, -19.623659), (32.825365, -19.633271), (32.819474, -19.641952), (32.81627, -19.652081), (32.819629, -19.674302), (32.83105, -19.685154), (32.849137, -19.689081), (32.872184, -19.690218), (32.894715, -19.684327), (32.924584, -19.655285), (32.943188, -19.64929), (32.960964, -19.658799), (32.962411, -19.679056), (32.954143, -19.717813), (32.962411, -19.735383), (32.979051, -19.751403), (33.0006, -19.764322), (33.022769, -19.773107), (33.032795, -19.784166), (33.029642, -19.80339), (33.022873, -19.826851), (33.021322, -19.868088), (33.001995, -19.927), (32.998378, -20.000897), (33.004373, -20.024255), (33.007266, -20.032006), (32.95373, -20.030249), (32.940087, -20.041515), (32.934299, -20.072107), (32.926548, -20.086473), (32.910683, -20.091124), (32.894405, -20.094018), (32.88531, -20.10301), (32.877869, -20.151689), (32.872908, -20.167192), (32.859265, -20.190859), (32.857095, -20.200575), (32.858335, -20.207499), (32.865053, -20.220935), (32.86557, -20.228893), (32.858438, -20.259486), (32.852961, -20.273852), (32.845209, -20.286668), (32.800767, -20.338551), (32.735862, -20.414205), (32.704443, -20.471773), (32.671783, -20.531821), (32.646462, -20.557969), (32.603674, -20.56479), (32.556545, -20.559312), (32.513136, -20.564583), (32.481614, -20.603031), (32.471072, -20.645509), (32.469108, -20.68685), (32.483474, -20.794233), (32.49722, -20.898103), (32.491019, -20.936344), (32.467661, -20.980165), (32.417122, -21.040937), (32.339814, -21.134058), (32.345343, -21.142843), (32.359864, -21.151421), (32.368856, -21.162997), (32.373352, -21.163617), (32.377744, -21.16341), (32.380638, -21.165477), (32.380535, -21.172195), (32.376866, -21.178499), (32.37299, -21.183977), (32.37175, -21.187905), (32.444613, -21.304693), (32.445849, -21.308994), (32.447197, -21.313685), (32.408543, -21.290327), (32.37299, -21.327948), (32.324517, -21.378177), (32.272221, -21.432541), (32.219718, -21.486904), (32.167318, -21.541268), (32.114814, -21.595632), (32.062415, -21.649995), (32.010015, -21.704462), (31.957615, -21.758826), (31.905215, -21.813189), (31.852712, -21.867553), (31.800312, -21.92202), (31.747808, -21.976384), (31.695512, -22.030747), (31.643112, -22.085214), (31.590712, -22.139578), (31.538209, -22.193941), (31.485809, -22.248305), (31.433822, -22.302048), (31.36871, -22.345043), (31.288922, -22.39734), (31.265616, -22.365507), (31.255642, -22.357962), (31.24572, -22.357549), (31.229597, -22.363957), (31.221536, -22.364887), (31.213474, -22.36189), (31.197868, -22.352588), (31.190685, -22.350624), (31.183657, -22.34556), (31.163348, -22.322616), (31.152599, -22.316414), (31.137717, -22.318482), (31.10454, -22.333364), (31.097048, -22.334922), (31.087642, -22.336878), (31.07033, -22.333674), (31.036121, -22.319618), (30.927187, -22.295744), (30.867087, -22.289646), (30.83789, -22.282308), (30.805282, -22.294504), (30.693919, -22.302772), (30.674282, -22.30856), (30.647410999999998, -22.32644), (30.632424, -22.330677), (30.625551, -22.32861), (30.610307, -22.318688), (30.601108, -22.316414), (30.57217, -22.316621), (30.507367, -22.309593), (30.488454, -22.310213), (30.46923, -22.315071), (30.431713, -22.331194), (30.412696, -22.336878), (30.372078, -22.343493), (30.334975, -22.344733), (30.300765, -22.336982), (30.269346, -22.316414), (30.25529, -22.304736), (30.240407, -22.296157), (30.2217, -22.290886), (30.196999, -22.289129), (30.15266, -22.294814), (30.13509, -22.293574), (30.111113, -22.282308), (30.082587, -22.262878), (30.067911, -22.25709), (30.038145, -22.253783), (30.035872, -22.250579), (30.034528, -22.246135), (30.015511, -22.227014), (30.005279, -22.22226), (29.983782, -22.217713), (29.973963, -22.213992), (29.946678, -22.198282), (29.932105, -22.194355), (29.896035, -22.191358), (29.871489, -22.179265), (29.837331, -22.172444), (29.779246, -22.136374), (29.758886, -22.130896), (29.691448, -22.1341), (29.679614, -22.138338), (29.661424, -22.126452), (29.641064, -22.129242), (29.60396, -22.145055), (29.570164, -22.141955), (29.551043, -22.145986), (29.542517, -22.162522), (29.53182, -22.172444), (29.506912, -22.170067), (29.456889, -22.158801), (29.436115, -22.163142), (29.399528, -22.182159), (29.378031, -22.192908), (29.363250999999998, -22.192288), (29.356947, -22.190944000000002), (29.350074, -22.186707), (29.273644, -22.125108), (29.26734, -22.115807), (29.259588, -22.096066), (29.254111, -22.087074), (29.244395, -22.075706), (29.239331, -22.072605), (29.144867, -22.075292), (29.10797, -22.069194), (29.070763, -22.051004), (29.040532, -22.020929), (29.021567, -21.982791), (29.013815, -21.940417), (29.017949, -21.898145), (29.028905, -21.876648), (29.045441, -21.852567), (29.057637, -21.829209), (29.05526, -21.809985), (29.038723, -21.797893), (28.998726, -21.786008), (28.980846, -21.774845), (28.951907, -21.768334), (28.891032, -21.764924), (28.860853, -21.757379), (28.714195, -21.693507), (28.66841, -21.679968), (28.629704, -21.651339), (28.6157, -21.647101), (28.585934, -21.644414), (28.553998, -21.636559), (28.542939, -21.638316), (28.532501, -21.643071), (28.497309, -21.651546), (28.481393, -21.657437), (28.464598, -21.660331), (28.443101, -21.655783), (28.361762, -21.616302), (28.321919, -21.603486), (28.284867, -21.596872), (28.165702, -21.595218), (28.090771, -21.581266), (28.032893, -21.577855), (28.016563, -21.572894), (28.002559, -21.564212), (27.990415, -21.551913), (27.984731, -21.542922), (27.975739, -21.522561), (27.970571, -21.514396), (27.963698, -21.510469), (27.958066, -21.511502), (27.953208, -21.510469), (27.949281, -21.500754), (27.954448, -21.487835), (27.950418, -21.482047), (27.943338, -21.479876), (27.939876, -21.478016), (27.941943, -21.468508), (27.949642, -21.456519), (27.953001, -21.448664), (27.950211, -21.438329), (27.920549, -21.381174), (27.904219, -21.364741), (27.897811, -21.35544), (27.896157, -21.347895), (27.896674, -21.332392), (27.8944, -21.32433), (27.884995, -21.310171), (27.849132, -21.269657), (27.823604, -21.231726), (27.793838, -21.197413), (27.724385, -21.149664), (27.709192, -21.134471), (27.674775, -21.090133), (27.666611, -21.071219), (27.666817, -21.053753), (27.678961, -21.000733), (27.680356, -20.979649), (27.672657, -20.923528), (27.672605, -20.913709), (27.675085, -20.891282), (27.674775, -20.879913), (27.676016, -20.866684), (27.681803, -20.857589), (27.689038, -20.849011), (27.694412, -20.837744999999998), (27.709605, -20.756716), (27.707332, -20.716719), (27.682475, -20.637344), (27.690382, -20.60148), (27.702629, -20.566134), (27.705575, -20.526653), (27.698133, -20.509083), (27.683767, -20.49606), (27.66599, -20.489136), (27.625786, -20.488619), (27.590853, -20.473323), (27.534112, -20.483038), (27.45391, -20.473323), (27.340739, -20.473013), (27.306012, -20.477354), (27.268392, -20.49575), (27.283998, -20.35147), (27.266015, -20.234164), (27.214907, -20.110451), (27.201781, -20.092984), (27.183746, -20.082339), (27.16292, -20.076551), (27.141888, -20.073347), (27.129692, -20.072934), (27.119771, -20.073864), (27.109642, -20.073244), (27.097343, -20.068903), (27.086491, -20.060532), (27.069231, -20.03738), (27.060136, -20.027562), (27.02665, -20.010095), (26.9943, -20.006788), (26.961072, -20.007201), (26.925054, -20.000897), (26.811882, -19.94643), (26.774469, -19.939815), (26.750801, -19.939609), (26.730957, -19.935888), (26.713904, -19.927413), (26.698608, -19.91253), (26.684758, -19.894547), (26.67717, -19.886815), (26.673803, -19.883385), (26.659437, -19.875737), (26.614065, -19.863438), (26.595565, -19.855583), (26.581922, -19.842147), (26.574791, -19.819513), (26.566316, -19.800806), (26.549263, -19.784063), (26.508852, -19.759258), (26.489731, -19.75192), (26.450251, -19.743342), (26.431854, -19.73652), (26.412837, -19.71957), (26.385242, -19.679056), (26.362711, -19.667584), (26.332325, -19.662416), (26.324367, -19.659109), (26.312171, -19.651358), (26.312481, -19.649601), (26.319096, -19.646293), (26.326331, -19.633891), (26.333462, -19.613014), (26.330981, -19.604952), (26.32106, -19.592033), (26.313205, -19.584178), (26.30349, -19.577254), (26.292638, -19.572499), (26.239101, -19.571466), (26.194452, -19.560200000000002), (26.155488, -19.537153), (26.13027, -19.501082), (26.034359, -19.243734), (26.011414, -19.199809), (25.981132, -19.161775), (25.956534, -19.122088), (25.948576, -19.103277), (25.944855, -19.079196), (25.948059, -19.058732), (25.964389, -19.021629), (25.9678, -19.000958), (25.967449, -18.999925), (25.940721, -18.921273), (25.815251, -18.813993), (25.779491, -18.738752), (25.773393, -18.665578), (25.761921, -18.630335), (25.736909, -18.608734), (25.698255, -18.590234), (25.669523, -18.566049), (25.622084, -18.501143), (25.608442, -18.487708), (25.574439, -18.465693), (25.508499, -18.399134), (25.49558, -18.378877), (25.490516, -18.365545), (25.481163, -18.323377), (25.473204, -18.303429), (25.440855, -18.2532), (25.408816, -18.175995), (25.387525, -18.138995), (25.357449, -18.115844), (25.323446, -18.09662), (25.296368, -18.068612), (25.255026, -18.001122), (25.226088, -17.931876), (25.21937, -17.908001), (25.21937, -17.879786), (25.259781, -17.794107), (25.266705, -17.800928), (25.285412, -17.809299), (25.315901, -17.83214), (25.335538, -17.841235), (25.345254, -17.842579), (25.376466, -17.841235), (25.409539, -17.853018), (25.420288, -17.854878), (25.49558, -17.854878), (25.500748, -17.856015), (25.510153, -17.861183), (25.516458, -17.862319), (25.522142, -17.860149), (25.530927, -17.850951), (25.536818, -17.848677), (25.603997, -17.836171), (25.657017, -17.81395), (25.681409, -17.81147), (25.694224, -17.819428), (25.70642, -17.829867), (25.743834, -17.839375), (25.765951, -17.849814), (25.786002, -17.862216), (25.794683, -17.872655), (25.804399, -17.888158), (25.849667, -17.906658), (25.86362, -17.923814), (25.847497, -17.929395), (25.846153, -17.943658), (25.853490999999998, -17.959988), (25.86362, -17.971563), (25.924495, -17.998952), (25.966973, -18.000502), (25.978548, -17.998952), (26.033739, -17.971563), (26.04056, -17.978488), (26.046554, -17.966292), (26.062471, -17.962882), (26.081178, -17.962365), (26.095234, -17.958541), (26.096164, -17.954614), (26.0942, -17.941901), (26.095234, -17.938077), (26.101228, -17.935803), (26.118591, -17.931566), (26.135438, -17.922574), (26.158589, -17.918337), (26.167477, -17.913582), (26.203031, -17.887227), (26.211919, -17.882783), (26.221117, -17.886297), (26.228249, -17.894669), (26.233933, -17.903971), (26.239204, -17.910172), (26.248299, -17.913376), (26.294291, -17.918543), (26.3038, -17.922781), (26.311965, -17.928362), (26.318269, -17.934356), (26.325504, -17.93601), (26.362711, -17.930636), (26.408599, -17.939007), (26.485494, -17.979315), (26.527145, -17.992027), (26.553604, -17.996471), (26.570243, -18.002879), (26.583369, -18.013215), (26.598872, -18.029958), (26.612721, -18.041223), (26.628844, -18.049181), (26.685689, -18.066751), (26.700003, -18.069232), (26.71194, -18.065821), (26.740569, -18.0405), (26.753591, -18.032955), (26.769714, -18.029028), (26.794002, -18.026237), (26.88826, -17.984586), (26.912031, -17.992027), (26.94867, -17.968876), (26.95916, -17.964742), (27.006289, -17.962675), (27.021275, -17.958541), (27.048457, -17.944278), (27.078171, -17.916993), (27.11543, -17.882163), (27.149019, -17.842476), (27.146539, -17.818911), (27.145299, -17.794107), (27.146952, -17.783875), (27.157081, -17.769302), (27.422078, -17.504822), (27.524294, -17.415112), (27.577314, -17.363125), (27.604495, -17.312792), (27.624856, -17.233314), (27.641186, -17.198484), (27.777301, -17.001183), (27.816886, -16.959636), (27.868562, -16.929663), (28.022993, -16.865393), (28.113922, -16.827551), (28.21252, -16.748589), (28.280113, -16.706524), (28.643295, -16.568755), (28.690734, -16.56028), (28.718794, -16.56028), (28.73285, -16.55811), (28.741377, -16.550668), (28.761117, -16.532271), (28.769282, -16.515218), (28.808866, -16.486279), (28.822509, -16.470776), (28.829124, -16.434603), (28.833051, -16.426438), (28.857236, -16.388198), (28.857029, -16.36546), (28.840492, -16.323602), (28.836772, -16.306342), (28.840286, -16.284741), (28.86416, -16.231205), (28.847107, -16.202679), (28.852481, -16.162785), (28.8654, -16.121237), (28.870981, -16.087234), (28.868501, -16.08217), (28.86385, -16.076589), (28.859303, -16.069561), (28.857236, -16.060466), (28.860336, -16.049407), (28.874082, -16.028943), (28.877183, -16.022018), (28.898887, -15.995457), (28.932373, -15.963727), (28.946862, -15.957235), (28.951287, -15.955252), (28.972784, -15.951428), (29.018053, -15.950602), (29.042341, -15.946261), (29.055053, -15.934375), (29.076344, -15.895411), (29.086162, -15.884559), (29.102182, -15.870916), (29.121716, -15.859341), (29.141869, -15.854483), (29.150964, -15.848799), (29.186311, -15.812832), (29.406969, -15.714233), (29.422059, -15.711030000000001), (29.508462, -15.703588), (29.526239, -15.692839), (29.563446, -15.662144), (29.587217, -15.655736), (29.608559, -15.658422999999999), (29.62799, -15.663591), (29.648505, -15.666588), (29.672793, -15.663281), (29.73005, -15.644677), (29.773252, -15.638062), (29.814283, -15.619666), (29.837331, -15.614808), (29.881773, -15.618839), (29.967504, -15.641473), (30.010654, -15.646227)]), 6) +``` +```response +0.45539 +``` + +### Input parameters + +### Returned value + +## polygonsIntersectionCartesian + +Calculates the intersection of polygons. + +### Example + +``` sql +SELECT wkt(polygonsIntersectionCartesian([[[(0., 0.), (0., 3.), (1., 2.9), (2., 2.6), (2.6, 2.), (2.9, 1.), (3., 0.), (0., 0.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]])) +``` +```response +MULTIPOLYGON(((1 2.9,2 2.6,2.6 2,2.9 1,1 1,1 2.9))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonAreaCartesian + +Calculates the area of a polygon + +### Example + +``` sql +SELECT polygonAreaCartesian([[[(0., 0.), (0., 5.), (5., 5.), (5., 0.)]]]) +``` +```response +25 +``` + +### Input parameters + +Polygon + +### Returned value + +Float64 + +## polygonPerimeterCartesian + +Calculates the perimeter of a polygon. + +### Example + +``` sql +SELECT polygonPerimeterCartesian([[[(0., 0.), (0., 5.), (5., 5.), (5., 0.)]]]) +``` +```response +15 +``` + +### Input parameters + +Polygon + +### Returned value + +Float64 + +## polygonsUnionCartesian + +Calculates the union of polygons. + +### Example + +``` sql +SELECT wkt(polygonsUnionCartesian([[[(0., 0.), (0., 3.), (1., 2.9), (2., 2.6), (2.6, 2.), (2.9, 1), (3., 0.), (0., 0.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]])) +``` +```response +MULTIPOLYGON(((1 2.9,1 4,4 4,4 1,2.9 1,3 0,0 0,0 3,1 2.9))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +For more information on geometry systems, see this [presentation](https://archive.fosdem.org/2020/schedule/event/working_with_spatial_trajectories_in_boost_geometry/attachments/slides/3988/export/events/attachments/working_with_spatial_trajectories_in_boost_geometry/slides/3988/FOSDEM20_vissarion.pdf) about the Boost library, which is what ClickHouse uses. + diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index 3cd66cfaaeb..63fe5ca8530 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -21,12 +21,12 @@ geoToS2(lon, lat) **Arguments** -- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- S2 point index. +- S2 point index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -58,11 +58,11 @@ s2ToGeo(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. +- A tuple consisting of two values: `tuple(lon,lat)`. Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md). @@ -84,7 +84,7 @@ Result: ## s2GetNeighbors -Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors. +Returns S2 neighbor indexes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors. **Syntax** @@ -94,11 +94,11 @@ s2GetNeighbors(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. +- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -130,12 +130,12 @@ s2CellsIntersect(s2index1, s2index2) **Arguments** -- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — If the cells intersect. -- 0 — If the cells don't intersect. +- 1 — If the cells intersect. +- 0 — If the cells don't intersect. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -167,14 +167,14 @@ s2CapContains(center, degrees, point) **Arguments** -- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — If the cap contains the S2 point index. -- 0 — If the cap doesn't contain the S2 point index. +- 1 — If the cap contains the S2 point index. +- 0 — If the cap doesn't contain the S2 point index. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -206,13 +206,13 @@ s2CapUnion(center1, radius1, center2, radius2) **Arguments** -- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). +- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -242,14 +242,14 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). **Example** @@ -279,14 +279,14 @@ s2RectContains(s2PointLow, s2PointHi, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — If the rectangle contains the given S2 point. -- 0 — If the rectangle doesn't contain the given S2 point. +- 1 — If the rectangle contains the given S2 point. +- 0 — If the rectangle doesn't contain the given S2 point. **Example** @@ -316,13 +316,13 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi) **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -352,13 +352,13 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 69dc73e2fb0..635c8f4e0ec 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/hash-functions -sidebar_position: 50 +sidebar_position: 85 sidebar_label: Hash --- @@ -64,7 +64,7 @@ This is a cryptographic hash function. It works at least three times faster than The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: 1. The first and the second hash value are concatenated to an array which is hashed. -2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way. +2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. 3. This calculation is repeated for all remaining hash values of the original input. **Arguments** @@ -125,7 +125,7 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x',' Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits. -:::warning +:::note This 128-bit variant differs from the reference implementation and it's weaker. This version exists because, when it was written, there was no official 128-bit extension for SipHash. New projects should probably use [sipHash128Reference](#hash_functions-siphash128reference). @@ -165,7 +165,7 @@ Result: Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key. -:::warning +:::note This 128-bit variant differs from the reference implementation and it's weaker. This version exists because, when it was written, there was no official 128-bit extension for SipHash. New projects should probably use [sipHash128ReferenceKeyed](#hash_functions-siphash128referencekeyed). @@ -279,6 +279,8 @@ cityHash64(par1,...) This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results. +Note that Google changed the algorithm of CityHash after it has been added to ClickHouse. In other words, ClickHouse's cityHash64 and Google's upstream CityHash now produce different results. ClickHouse cityHash64 corresponds to CityHash v1.0.2. + **Arguments** The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). @@ -335,11 +337,11 @@ Even in these cases, we recommend applying the function offline and pre-calculat **Arguments** -- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). +- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). @@ -441,11 +443,11 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 ## javaHash -Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), -[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), -[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), -[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), -[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). +Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), +[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), +[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), +[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), +[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types. @@ -502,7 +504,7 @@ javaHashUTF16LE(stringUtf16le) **Arguments** -- `stringUtf16le` — a string in UTF-16LE encoding. +- `stringUtf16le` — a string in UTF-16LE encoding. **Returned value** @@ -607,8 +609,8 @@ Both functions take a variable number of input parameters. Arguments can be any **Returned Value** -- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. -- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. **Example** @@ -634,11 +636,11 @@ gccMurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). **Returned value** -- Calculated hash value. +- Calculated hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -660,6 +662,45 @@ Result: └──────────────────────┴─────────────────────┘ ``` + +## kafkaMurmurHash + +Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482) and without the highest bit to be compatible with [Default Partitioner](https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328). + +**Syntax** + +```sql +MurmurHash(par1, ...) +``` + +**Arguments** + +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). + +**Returned value** + +- Calculated hash value. + +Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + kafkaMurmurHash('foobar') AS res1, + kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2 +``` + +Result: + +```response +┌───────res1─┬─────res2─┐ +│ 1357151166 │ 85479775 │ +└────────────┴──────────┘ +``` + ## murmurHash3_32, murmurHash3_64 Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value. @@ -675,8 +716,8 @@ Both functions take a variable number of input parameters. Arguments can be any **Returned Value** -- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. -- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. **Example** @@ -702,7 +743,7 @@ murmurHash3_128(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). **Returned value** @@ -738,7 +779,7 @@ xxh3(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. +- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. **Returned value** @@ -798,7 +839,7 @@ Result: **See Also** -- [xxHash](http://cyan4973.github.io/xxHash/). +- [xxHash](http://cyan4973.github.io/xxHash/). ## ngramSimHash @@ -814,12 +855,12 @@ ngramSimHash(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -853,12 +894,12 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -892,12 +933,12 @@ ngramSimHashUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -931,12 +972,12 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -970,12 +1011,12 @@ wordShingleSimHash(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1009,12 +1050,12 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1048,12 +1089,12 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1087,12 +1128,12 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1126,13 +1167,13 @@ ngramMinHash(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1166,13 +1207,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1206,13 +1247,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1246,13 +1287,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1284,13 +1325,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1322,13 +1363,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1360,13 +1401,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1398,13 +1439,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1438,13 +1479,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1478,13 +1519,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1518,13 +1559,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1558,13 +1599,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1596,13 +1637,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1634,13 +1675,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1672,13 +1713,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1710,13 +1751,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). diff --git a/docs/en/sql-reference/functions/in-functions.md b/docs/en/sql-reference/functions/in-functions.md index 185672227da..193c54cea44 100644 --- a/docs/en/sql-reference/functions/in-functions.md +++ b/docs/en/sql-reference/functions/in-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/in-functions -sidebar_position: 60 +sidebar_position: 90 sidebar_label: IN Operator --- diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 22e79ec6623..42d402e9d44 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/functions/ -sidebar_position: 32 -sidebar_label: Functions +sidebar_position: 1 +sidebar_label: Overview --- -# Functions +# Regular Functions There are at least\* two types of functions - regular functions (they are just called “functions”) and aggregate functions. These are completely different concepts. Regular functions work as if they are applied to each row separately (for each row, the result of the function does not depend on the other rows). Aggregate functions accumulate a set of values from various rows (i.e. they depend on the entire set of rows). @@ -37,8 +37,8 @@ Functions can be implemented in different ways for constant and non-constant arg Functions have the following behaviors: -- If at least one of the arguments of the function is `NULL`, the function result is also `NULL`. -- Special behavior that is specified individually in the description of each function. In the ClickHouse source code, these functions have `UseDefaultImplementationForNulls=false`. +- If at least one of the arguments of the function is `NULL`, the function result is also `NULL`. +- Special behavior that is specified individually in the description of each function. In the ClickHouse source code, these functions have `UseDefaultImplementationForNulls=false`. ## Constancy @@ -59,244 +59,6 @@ A lambda function that accepts multiple arguments can also be passed to a higher For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed. -## SQL User Defined Functions +## User Defined Functions (UDFs) -Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement. - -## Executable User Defined Functions -ClickHouse can call any external executable program or script to process data. - -The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter. - -A function configuration contains the following settings: - -- `name` - a function name. -- `command` - script name to execute or command if `execute_direct` is false. -- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. -- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. -- `return_type` - the type of a returned value. -- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. -- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. -- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. -- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. -- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `pool_size` - the size of a command pool. Optional. Default value is `16`. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. -- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. - -The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk. - -**Example** - -Creating `test_function` using XML configuration. -File test_function.xml. -```xml - - - executable - test_function_python - String - - UInt64 - value - - TabSeparated - test_function.py - - -``` - -Script file inside `user_scripts` folder `test_function.py`. - -```python -#!/usr/bin/python3 - -import sys - -if __name__ == '__main__': - for line in sys.stdin: - print("Value " + line, end='') - sys.stdout.flush() -``` - -Query: - -``` sql -SELECT test_function_python(toUInt64(2)); -``` - -Result: - -``` text -┌─test_function_python(2)─┐ -│ Value 2 │ -└─────────────────────────┘ -``` - -Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration. -File test_function.xml. -```xml - - - executable - test_function_sum - UInt64 - - UInt64 - lhs - - - UInt64 - rhs - - TabSeparated - cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table" - 0 - - -``` - -Query: - -``` sql -SELECT test_function_sum(2, 2); -``` - -Result: - -``` text -┌─test_function_sum(2, 2)─┐ -│ 4 │ -└─────────────────────────┘ -``` - -Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration. -File test_function.xml. -```xml - - - executable - test_function_sum_json - UInt64 - result_name - - UInt64 - argument_1 - - - UInt64 - argument_2 - - JSONEachRow - test_function_sum_json.py - - -``` - -Script file inside `user_scripts` folder `test_function_sum_json.py`. - -```python -#!/usr/bin/python3 - -import sys -import json - -if __name__ == '__main__': - for line in sys.stdin: - value = json.loads(line) - first_arg = int(value['argument_1']) - second_arg = int(value['argument_2']) - result = {'result_name': first_arg + second_arg} - print(json.dumps(result), end='\n') - sys.stdout.flush() -``` - -Query: - -``` sql -SELECT test_function_sum_json(2, 2); -``` - -Result: - -``` text -┌─test_function_sum_json(2, 2)─┐ -│ 4 │ -└──────────────────────────────┘ -``` - -Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). -File test_function_parameter_python.xml. -```xml - - - executable - test_function_parameter_python - String - - UInt64 - - TabSeparated - test_function_parameter_python.py {test_parameter:UInt64} - - -``` - -Script file inside `user_scripts` folder `test_function_parameter_python.py`. - -```python -#!/usr/bin/python3 - -import sys - -if __name__ == "__main__": - for line in sys.stdin: - print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="") - sys.stdout.flush() -``` - -Query: - -``` sql -SELECT test_function_parameter_python(1)(2); -``` - -Result: - -``` text -┌─test_function_parameter_python(1)(2)─┐ -│ Parameter 1 value 2 │ -└──────────────────────────────────────┘ -``` - -## Error Handling - -Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query. - -## Evaluation of Argument Expressions - -In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`. -But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately. - -## Performing Functions for Distributed Query Processing - -For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server. - -This means that functions can be performed on different servers. -For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),` - -- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server. -- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server. - -The result of a function usually does not depend on which server it is performed on. However, sometimes this is important. -For example, functions that work with dictionaries use the dictionary that exists on the server they are running on. -Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query. - -If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`. - - -## Related Content - -- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) +ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md). diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 9357f75b8e6..8cb35483555 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/introspection -sidebar_position: 65 +sidebar_position: 100 sidebar_label: Introspection --- @@ -8,15 +8,15 @@ sidebar_label: Introspection You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling. -:::warning +:::note These functions are slow and may impose security considerations. ::: For proper operation of introspection functions: -- Install the `clickhouse-common-static-dbg` package. +- Install the `clickhouse-common-static-dbg` package. -- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. +- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. For security reasons introspection functions are disabled by default. @@ -36,17 +36,17 @@ addressToLine(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Source code filename and the line number in this file delimited by colon. +- Source code filename and the line number in this file delimited by colon. For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. -- Name of a binary, if the function couldn’t find the debug information. +- Name of a binary, if the function couldn’t find the debug information. -- Empty string, if the address is not valid. +- Empty string, if the address is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -129,15 +129,15 @@ addressToLineWithInlines(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. +- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. -- Array with single element which is name of a binary, if the function couldn’t find the debug information. +- Array with single element which is name of a binary, if the function couldn’t find the debug information. -- Empty array, if the address is not valid. +- Empty array, if the address is not valid. Type: [Array(String)](../../sql-reference/data-types/array.md). @@ -232,12 +232,12 @@ addressToSymbol(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Symbol from ClickHouse object files. -- Empty string, if the address is not valid. +- Symbol from ClickHouse object files. +- Empty string, if the address is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -329,12 +329,12 @@ demangle(symbol) **Arguments** -- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. +- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. **Returned value** -- Name of the C++ function. -- Empty string if a symbol is not valid. +- Name of the C++ function. +- Empty string if a symbol is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -425,7 +425,7 @@ tid() **Returned value** -- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). **Example** @@ -455,11 +455,11 @@ logTrace('message') **Arguments** -- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). +- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Always returns 0. +- Always returns 0. **Example** diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 3843ca0fc36..0dc1db1161b 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ip-address-functions -sidebar_position: 55 +sidebar_position: 95 sidebar_label: IP Addresses --- @@ -147,11 +147,11 @@ IPv6StringToNum(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../../sql-reference/data-types/string.md). **Returned value** -- IPv6 address in binary format. +- IPv6 address in binary format. Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). @@ -175,7 +175,7 @@ Result: **See Also** -- [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). +- [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). ## IPv6StringToNumOrDefault(s) @@ -280,12 +280,20 @@ SELECT ## toIPv4OrDefault(string) -Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns 0. +Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4). ## toIPv4OrNull(string) Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns null. +## toIPv6OrDefault(string) + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns `::` (0 IPv6). + +## toIPv6OrNull(string) + +Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null. + ## toIPv6 Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/domains/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. @@ -301,11 +309,11 @@ toIPv6(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md) +- `string` — IP address. [String](../../sql-reference/data-types/string.md) **Returned value** -- IP address. +- IP address. Type: [IPv6](../../sql-reference/data-types/domains/ipv6.md). @@ -362,11 +370,11 @@ isIPv4String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../../sql-reference/data-types/string.md). **Returned value** -- `1` if `string` is IPv4 address, `0` otherwise. +- `1` if `string` is IPv4 address, `0` otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -400,11 +408,11 @@ isIPv6String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../../sql-reference/data-types/string.md). **Returned value** -- `1` if `string` is IPv6 address, `0` otherwise. +- `1` if `string` is IPv6 address, `0` otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -441,12 +449,12 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). -- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). +- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). **Returned value** -- `1` or `0`. +- `1` or `0`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -508,11 +516,11 @@ This function performs reverse DNS resolutions on both IPv4 and IPv6. **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). **Returned value** -- Associated domains (PTR records). +- Associated domains (PTR records). Type: Type: [Array(String)](../../sql-reference/data-types/array.md). diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index bfe2a541647..31d53ba0359 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/json-functions -sidebar_position: 56 +sidebar_position: 105 sidebar_label: JSON --- @@ -109,9 +109,9 @@ SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4) = 0 `indices_or_keys` is a list of zero or more arguments each of them can be either string or integer. -- String = access object member by key. -- Positive integer = access the n-th member/key from the beginning. -- Negative integer = access the n-th member/key from the end. +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. Minimum index of the element is 1. Thus the element 0 does not exist. @@ -206,6 +206,7 @@ Examples: ``` sql SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, Array(Float64))') = ('hello',[-100,200,300]) SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(b Array(Float64), a String)') = ([-100,200,300],'hello') +SELECT JSONExtract('{"a": "hello", "b": "world"}', 'Map(String, String)') = map('a', 'hello', 'b', 'world'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))') = [-100, NULL, NULL] SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)') = NULL SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8') = 1 @@ -235,8 +236,8 @@ JSONExtractKeys(json[, a, b, c...]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned value** @@ -297,13 +298,13 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. -- Empty array if the requested object does not exist, or input JSON is invalid. +- Array with `('key', 'value')` tuples. Both tuple members are strings. +- Empty array if the requested object does not exist, or input JSON is invalid. Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). @@ -401,7 +402,7 @@ Before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, jso Parses a JSON and extract a value as JSON scalar. -If the value does not exist, an empty string will be returned. +If the value does not exist, an empty string will be returned by default, and by SET `function_return_type_allow_nullable` = `true`, `NULL` will be returned. If the value is complex type (such as: struct, array, map), an empty string will be returned by default, and by SET `function_json_value_return_type_allow_complex` = `true`, the complex value will be returned. Example: @@ -410,6 +411,8 @@ SELECT JSON_VALUE('{"hello":"world"}', '$.hello'); SELECT JSON_VALUE('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); SELECT JSON_VALUE('{"hello":2}', '$.hello'); SELECT toTypeName(JSON_VALUE('{"hello":2}', '$.hello')); +select JSON_VALUE('{"hello":"world"}', '$.b') settings function_return_type_allow_nullable=true; +select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true; ``` Result: @@ -440,11 +443,11 @@ toJSONString(value) **Arguments** -- `value` — Value to serialize. Value may be of any data type. +- `value` — Value to serialize. Value may be of any data type. **Returned value** -- JSON representation of the value. +- JSON representation of the value. Type: [String](../../sql-reference/data-types/string.md). @@ -469,8 +472,8 @@ Result: **See Also** -- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) -- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) +- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) +- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) ## JSONArrayLength @@ -487,11 +490,11 @@ Alias: `JSON_ARRAY_LENGTH(json)`. **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. **Returned value** -- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. +- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 137753d12c9..f5a1a6aac12 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -1,18 +1,18 @@ --- slug: /en/sql-reference/functions/logical-functions -sidebar_position: 37 +sidebar_position: 110 sidebar_label: Logical --- # Logical Functions -Performs logical operations on arguments of any numeric types, but returns a [UInt8](../../sql-reference/data-types/int-uint.md) number equal to 0, 1 or `NULL` in some cases. +Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../../sql-reference/data-types/int-uint.md) or in some cases `NULL`. -Zero as an argument is considered `false`, while any non-zero value is considered `true`. +Zero as an argument is considered `false`, non-zero values are considered `true`. ## and -Calculates the result of the logical conjunction between two or more values. Corresponds to [Logical AND Operator](../../sql-reference/operators/index.md#logical-and-operator). +Calculates the logical conjunction between two or more values. **Syntax** @@ -20,24 +20,24 @@ Calculates the result of the logical conjunction between two or more values. Cor and(val1, val2...) ``` -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `and` function according to a short scheme. If this setting is enabled, `vali` is evaluated only on rows where `(val1 AND val2 AND ... AND val{i-1})` is true. For example, an exception about division by zero is not thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(10)`. +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`. + +Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-operator). **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `0`, if there is at least one zero value argument. -- `NULL`, if there are no zero values arguments and there is at least one `NULL` argument. -- `1`, otherwise. +- `0`, if there at least one argument evaluates to `false`, +- `NULL`, if no argumetn evaluates to `false` and at least one argument is `NULL`, +- `1`, otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT and(0, 1, -2); ``` @@ -66,7 +66,7 @@ Result: ## or -Calculates the result of the logical disjunction between two or more values. Corresponds to [Logical OR Operator](../../sql-reference/operators/index.md#logical-or-operator). +Calculates the logical disjunction between two or more values. **Syntax** @@ -74,24 +74,24 @@ Calculates the result of the logical disjunction between two or more values. Cor or(val1, val2...) ``` -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `or` function according to a short scheme. If this setting is enabled, `vali` is evaluated only on rows where `((NOT val1) AND (NOT val2) AND ... AND (NOT val{i-1}))` is true. For example, an exception about division by zero is not thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(10)`. +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`. + +Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-operator). **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `1`, if there is at least one non-zero value. -- `0`, if there are only zero values. -- `NULL`, if there are only zero values and `NULL`. +- `1`, if at least one argument evaluates to `true`, +- `0`, if all arguments evaluate to `false`, +- `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT or(1, 0, 0, 2, NULL); ``` @@ -120,7 +120,7 @@ Result: ## not -Calculates the result of the logical negation of the value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). +Calculates logical negation of a value. **Syntax** @@ -128,22 +128,22 @@ Calculates the result of the logical negation of the value. Corresponds to [Logi not(val); ``` +Alias: The [Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). + **Arguments** -- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `1`, if the `val` is `0`. -- `0`, if the `val` is a non-zero value. -- `NULL`, if the `val` is a `NULL` value. +- `1`, if `val` evaluates to `false`, +- `0`, if `val` evaluates to `true`, +- `NULL`, if `val` is `NULL`. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT NOT(1); ``` @@ -158,7 +158,7 @@ Result: ## xor -Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function works as if it calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. +Calculates the logical exclusive disjunction between two or more values. For more than two values the function first xor-s the first two values, then xor-s the result with the third value etc. **Syntax** @@ -168,20 +168,18 @@ xor(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `1`, for two values: if one of the values is zero and other is not. -- `0`, for two values: if both values are zero or non-zero at the same time. -- `NULL`, if there is at least one `NULL` value. +- `1`, for two values: if one of the values evaluates to `false` and other does not, +- `0`, for two values: if both values evaluate to `false` or to both `true`, +- `NULL`, if at least one of the inputs is `NULL` Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT xor(0, 1, 1); ``` diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 98408ef459c..44ce4dcd211 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/machine-learning-functions -sidebar_position: 64 +sidebar_position: 115 sidebar_label: Machine Learning --- @@ -16,4 +16,4 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen ## stochasticLogisticRegression -The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. \ No newline at end of file +The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 10bc73c4a72..9851378d4fd 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -1,120 +1,260 @@ --- slug: /en/sql-reference/functions/math-functions -sidebar_position: 44 +sidebar_position: 125 sidebar_label: Mathematical --- # Mathematical Functions -All the functions return a Float64 number. The accuracy of the result is close to the maximum precision possible, but the result might not coincide with the machine representable number nearest to the corresponding real number. +All the functions return a Float64 number. Results are generally as close to the actual result as possible, but in some cases less precise than the machine-representable number. -## e() +## e -Returns a Float64 number that is close to the number e. +Returns e. -## pi() +**Syntax** -Returns a Float64 number that is close to the number π. +```sql +e() +``` -## exp(x) +## pi -Accepts a numeric argument and returns a Float64 number close to the exponent of the argument. +Returns π. -## log(x), ln(x) +**Syntax** -Accepts a numeric argument and returns a Float64 number close to the natural logarithm of the argument. +```sql +pi() +``` -## exp2(x) +## exp -Accepts a numeric argument and returns a Float64 number close to 2 to the power of x. +Returns e to the power of the given argument. -## log2(x) +**Syntax** -Accepts a numeric argument and returns a Float64 number close to the binary logarithm of the argument. +```sql +exp(x) +``` -## exp10(x) +## log -Accepts a numeric argument and returns a Float64 number close to 10 to the power of x. +Returns the natural logarithm of the argument. -## log10(x) +**Syntax** -Accepts a numeric argument and returns a Float64 number close to the decimal logarithm of the argument. +```sql +log(x) +``` -## sqrt(x) +Alias: `ln(x)` -Accepts a numeric argument and returns a Float64 number close to the square root of the argument. +## exp2 -## cbrt(x) +Returns 2 to the power of the given argumetn -Accepts a numeric argument and returns a Float64 number close to the cubic root of the argument. +**Syntax** -## erf(x) +```sql +exp2(x) +``` -If ‘x’ is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation ‘σ’ takes the value that is separated from the expected value by more than ‘x’. +## intExp2 -Example (three sigma rule): +Like `exp` but returns a UInt64. + +**Syntax** + +```sql +intExp2(x) +``` + +## log2 + +Returns the binary logarithm of the argument. + +**Syntax** + +```sql +log2(x) +``` + +## exp10 + +Returns 10 to the power of the given argumetn + +**Syntax** + +```sql +exp10(x) +``` + +## intExp10 + +Like `exp10` but returns a UInt64. + +**Syntax** + +```sql +intExp10(x) +``` + +## log10 + +Returns the decimal logarithm of the argument. + +**Syntax** + +```sql +log10(x) +``` + +## sqrt + +Returns the square root of the argument. + +```sql +sqrt(x) +``` + +## cbrt + +Returns the cubic root of the argument. + +```sql +cbrt(x) +``` + +## erf + +If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`. + +**Syntax** + +```sql +erf(x) +``` + +**Example** + +(three sigma rule) ``` sql SELECT erf(3 / sqrt(2)); ``` -``` text +```result ┌─erf(divide(3, sqrt(2)))─┐ │ 0.9973002039367398 │ └─────────────────────────┘ ``` -## erfc(x) +## erfc -Accepts a numeric argument and returns a Float64 number close to 1 - erf(x), but without loss of precision for large ‘x’ values. +Returns a number close to `1 - erf(x)` without loss of precision for large ‘x’ values. -## lgamma(x) +**Syntax** -The logarithm of the gamma function. +```sql +erfc(x) +``` -## tgamma(x) +## lgamma -Gamma function. +Returns the logarithm of the gamma function. -## sin(x) +**Syntax** -The sine. +```sql +lgamma(x) +``` -## cos(x) +## tgamma -The cosine. +Returns the gamma function. -## tan(x) +**Syntax** -The tangent. +```sql +gamma(x) +``` -## asin(x) +## sin -The arc sine. +Returns the sine of the argument -## acos(x) +**Syntax** -The arc cosine. +```sql +sin(x) +``` -## atan(x) +## cos -The arc tangent. +Returns the cosine of the argument. -## pow(x, y), power(x, y) +**Syntax** -Takes two numeric arguments x and y. Returns a Float64 number close to x to the power of y. +```sql +cos(x) +``` -## intExp2 +## tan -Accepts a numeric argument and returns a UInt64 number close to 2 to the power of x. +Returns the tangent of the argument. -## intExp10 +**Syntax** -Accepts a numeric argument and returns a UInt64 number close to 10 to the power of x. +```sql +tan(x) +``` -## cosh(x) +## asin -[Hyperbolic cosine](https://in.mathworks.com/help/matlab/ref/cosh.html). +Returns the arc sine of the argument. + +**Syntax** + +```sql +asin(x) +``` + +## acos + +Returns the arc cosine of the argument. + +**Syntax** + +```sql +acos(x) +``` + +## atan + +Returns the arc tangent of the argument. + +**Syntax** + +```sql +atan(x) +``` + +## pow + +Returns `x` to the power of `y`. + +**Syntax** + +```sql +pow(x, y) +``` + +Alias: `power(x, y)` + +## cosh + +Returns the [hyperbolic cosine](https://in.mathworks.com/help/matlab/ref/cosh.html) of the argument. **Syntax** @@ -124,33 +264,31 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Values from the interval: `1 <= cosh(x) < +∞`. +- Values from the interval: `1 <= cosh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT cosh(0); ``` Result: -``` text +```result ┌─cosh(0)──┐ │ 1 │ └──────────┘ ``` -## acosh(x) +## acosh -[Inverse hyperbolic cosine](https://www.mathworks.com/help/matlab/ref/acosh.html). +Returns the [inverse hyperbolic cosine](https://www.mathworks.com/help/matlab/ref/acosh.html). **Syntax** @@ -160,37 +298,31 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. +- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT acosh(1); ``` Result: -``` text +```result ┌─acosh(1)─┐ │ 0 │ └──────────┘ ``` -**See Also** +## sinh -- [cosh(x)](../../sql-reference/functions/math-functions.md#coshx) - -## sinh(x) - -[Hyperbolic sine](https://www.mathworks.com/help/matlab/ref/sinh.html). +Returns the [hyperbolic sine](https://www.mathworks.com/help/matlab/ref/sinh.html). **Syntax** @@ -200,33 +332,31 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Values from the interval: `-∞ < sinh(x) < +∞`. +- Values from the interval: `-∞ < sinh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT sinh(0); ``` Result: -``` text +```result ┌─sinh(0)──┐ │ 0 │ └──────────┘ ``` -## asinh(x) +## asinh -[Inverse hyperbolic sine](https://www.mathworks.com/help/matlab/ref/asinh.html). +Returns the [inverse hyperbolic sine](https://www.mathworks.com/help/matlab/ref/asinh.html). **Syntax** @@ -236,37 +366,31 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. +- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT asinh(0); ``` Result: -``` text +```result ┌─asinh(0)─┐ │ 0 │ └──────────┘ ``` -**See Also** +## atanh -- [sinh(x)](../../sql-reference/functions/math-functions.md#sinhx) - -## atanh(x) - -[Inverse hyperbolic tangent](https://www.mathworks.com/help/matlab/ref/atanh.html). +Returns the [inverse hyperbolic tangent](https://www.mathworks.com/help/matlab/ref/atanh.html). **Syntax** @@ -276,33 +400,31 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. +- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT atanh(0); ``` Result: -``` text +```result ┌─atanh(0)─┐ │ 0 │ └──────────┘ ``` -## atan2(y, x) +## atan2 -The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the Euclidean plane, given in radians, between the positive x axis and the ray to the point `(x, y) ≠ (0, 0)`. +Returns the [atan2](https://en.wikipedia.org/wiki/Atan2) as the angle in the Euclidean plane, given in radians, between the positive x axis and the ray to the point `(x, y) ≠ (0, 0)`. **Syntax** @@ -312,34 +434,32 @@ atan2(y, x) **Arguments** -- `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). -- `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle `θ` such that `−π < θ ≤ π`, in radians. +- The angle `θ` such that `−π < θ ≤ π`, in radians. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT atan2(1, 1); ``` Result: -``` text +```result ┌────────atan2(1, 1)─┐ │ 0.7853981633974483 │ └────────────────────┘ ``` -## hypot(x, y) +## hypot -Calculates the length of the hypotenuse of a right-angle triangle. The [function](https://en.wikipedia.org/wiki/Hypot) avoids problems that occur when squaring very large or very small numbers. +Returns the length of the hypotenuse of a right-angle triangle. [Hypot](https://en.wikipedia.org/wiki/Hypot) avoids problems that occur when squaring very large or very small numbers. **Syntax** @@ -349,34 +469,32 @@ hypot(x, y) **Arguments** -- `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). -- `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The length of the hypotenuse of a right-angle triangle. +- The length of the hypotenuse of a right-angle triangle. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT hypot(1, 1); ``` Result: -``` text +```result ┌────────hypot(1, 1)─┐ │ 1.4142135623730951 │ └────────────────────┘ ``` -## log1p(x) +## log1p -Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_logarithm#lnp1) `log1p(x)` is more accurate than `log(1+x)` for small values of x. +Calculates `log(1+x)`. The [calculation](https://en.wikipedia.org/wiki/Natural_logarithm#lnp1) `log1p(x)` is more accurate than `log(1+x)` for small values of x. **Syntax** @@ -386,35 +504,29 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Values from the interval: `-∞ < log1p(x) < +∞`. +- Values from the interval: `-∞ < log1p(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT log1p(0); ``` Result: -``` text +```result ┌─log1p(0)─┐ │ 0 │ └──────────┘ ``` -**See Also** - -- [log(x)](../../sql-reference/functions/math-functions.md#logx-lnx) - -## sign(x) +## sign Returns the sign of a real number. @@ -426,13 +538,13 @@ sign(x) **Arguments** -- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. +- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. **Returned value** - -1 for `x < 0` -- 0 for `x = 0` -- 1 for `x > 0` +- 0 for `x = 0` +- 1 for `x > 0` **Examples** @@ -444,7 +556,7 @@ SELECT sign(0); Result: -``` text +```result ┌─sign(0)─┐ │ 0 │ └─────────┘ @@ -458,7 +570,7 @@ SELECT sign(1); Result: -``` text +```result ┌─sign(1)─┐ │ 1 │ └─────────┘ @@ -472,15 +584,15 @@ SELECT sign(-1); Result: -``` text +```result ┌─sign(-1)─┐ │ -1 │ └──────────┘ ``` -## degrees(x) +## degrees -Converts the input value in radians to degrees. +Converts radians to degrees. **Syntax** @@ -490,33 +602,31 @@ degrees(x) **Arguments** -- `x` — Input in radians. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Input in radians. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Value in degrees. +- Value in degrees. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT degrees(3.141592653589793); ``` Result: -``` text +```result ┌─degrees(3.141592653589793)─┐ │ 180 │ └────────────────────────────┘ ``` -## radians(x) +## radians -Converts the input value in degrees to radians. +Converts degrees to radians. **Syntax** @@ -526,34 +636,31 @@ radians(x) **Arguments** -- `x` — Input in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Input in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Value in radians. +- Value in radians. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT radians(180); ``` Result: -``` text +```result ┌──────radians(180)─┐ │ 3.141592653589793 │ └───────────────────┘ ``` +## factorial -## factorial(n) - -Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. +Computes the factorial of an integer value. Works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater will cause exception throw. @@ -566,21 +673,19 @@ factorial(n) **Example** -Query: - ``` sql SELECT factorial(10); ``` Result: -``` text +```result ┌─factorial(10)─┐ │ 3628800 │ └───────────────┘ ``` -## width_bucket(operand, low, high, count) +## width_bucket Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`. @@ -591,21 +696,18 @@ Returns the number of the bucket in which `operand` falls in a histogram having ```sql widthBucket(operand, low, high, count) ``` - -There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases. +Alias: `WIDTH_BUCKET` **Example** -Query: - ``` sql SELECT widthBucket(10.15, -8.6, 23, 18); ``` Result: -``` text +```result ┌─widthBucket(10.15, -8.6, 23, 18)─┐ │ 11 │ └──────────────────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index f68448af2be..f10415783a5 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -1,11 +1,10 @@ --- slug: /en/sql-reference/functions/nlp-functions -sidebar_position: 67 -sidebar_label: NLP -title: "[experimental] Natural Language Processing functions" +sidebar_position: 130 +sidebar_label: NLP (experimental) --- -:::warning +:::note This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. ::: @@ -13,18 +12,18 @@ This is an experimental feature that is currently in development and is not read Performs stemming on a given word. -**Syntax** +### Syntax ``` sql stem('language', word) ``` -**Arguments** +### Arguments -- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). -- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). +- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). -**Examples** +### Examples Query: @@ -39,23 +38,58 @@ Result: │ ['I','think','it','is','a','bless','in','disguis'] │ └────────────────────────────────────────────────────┘ ``` +### Supported languages for stem() + +:::note +The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc. +::: + +- Arabic +- Armenian +- Basque +- Catalan +- Danish +- Dutch +- English +- Finnish +- French +- German +- Greek +- Hindi +- Hungarian +- Indonesian +- Irish +- Italian +- Lithuanian +- Nepali +- Norwegian +- Porter +- Portuguese +- Romanian +- Russian +- Serbian +- Spanish +- Swedish +- Tamil +- Turkish +- Yiddish ## lemmatize Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). -**Syntax** +### Syntax ``` sql lemmatize('language', word) ``` -**Arguments** +### Arguments -- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). -**Examples** +### Examples Query: @@ -71,12 +105,18 @@ Result: └─────────────────────┘ ``` -Configuration: +### Configuration + +This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from +[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + ``` xml + en en.bin + ``` @@ -89,18 +129,18 @@ With the `plain` extension type we need to provide a path to a simple text file, With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index. -**Syntax** +### Syntax ``` sql synonyms('extension_name', word) ``` -**Arguments** +### Arguments -- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). +- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). +- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). -**Examples** +### Examples Query: @@ -116,7 +156,7 @@ Result: └──────────────────────────────────────────┘ ``` -Configuration: +### Configuration ``` xml @@ -138,17 +178,17 @@ Detects the language of the UTF8-encoded input string. The function uses the [CL The `detectLanguage` function works best when providing over 200 characters in the input string. -**Syntax** +### Syntax ``` sql detectLanguage('text_to_be_analyzed') ``` -**Arguments** +### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). -**Returned value** +### Returned value - The 2-letter ISO code of the detected language @@ -157,7 +197,7 @@ Other possible results: - `un` = unknown, can not detect any language. - `other` = the detected language does not have 2 letter code. -**Examples** +### Examples Query: @@ -176,22 +216,22 @@ fr Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. -**Syntax** +### Syntax ``` sql detectLanguageMixed('text_to_be_analyzed') ``` -**Arguments** +### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). -**Returned value** +### Returned value -- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a perentage of text found for that language +- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language -**Examples** +### Examples Query: @@ -212,17 +252,17 @@ Result: Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. -**Syntax** +### Syntax ``` sql detectLanguageUnknown('text_to_be_analyzed') ``` -**Arguments** +### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). -**Returned value** +### Returned value - The 2-letter ISO code of the detected language @@ -231,7 +271,7 @@ Other possible results: - `un` = unknown, can not detect any language. - `other` = the detected language does not have 2 letter code. -**Examples** +### Examples Query: @@ -252,21 +292,21 @@ Result: The `detectCharset` function detects the character set of the non-UTF8-encoded input string. -**Syntax** +### Syntax ``` sql detectCharset('text_to_be_analyzed') ``` -**Arguments** +### Arguments -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). -**Returned value** +### Returned value - A `String` containing the code of the detected character set -**Examples** +### Examples Query: diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 7146484361e..efe1a77c285 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/other-functions -sidebar_position: 67 +sidebar_position: 140 sidebar_label: Other --- @@ -23,11 +23,11 @@ getMacro(name); **Arguments** -- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Value of the specified macro. +- Value of the specified macro. Type: [String](../../sql-reference/data-types/string.md). @@ -82,7 +82,7 @@ This function is case-insensitive. **Returned value** -- String with the fully qualified domain name. +- String with the fully qualified domain name. Type: `String`. @@ -112,17 +112,17 @@ basename( expr ) **Arguments** -- `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. +- `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. **Returned Value** A string that contains: -- The trailing part of a string after the last slash or backslash. +- The trailing part of a string after the last slash or backslash. If the input string contains a path ending with slash or backslash, for example, `/` or `c:\`, the function returns an empty string. -- The original string if there are no slashes or backslashes. +- The original string if there are no slashes or backslashes. **Example** @@ -196,11 +196,11 @@ byteSize(argument [, ...]) **Arguments** -- `argument` — Value. +- `argument` — Value. **Returned value** -- Estimation of byte size of the arguments in memory. +- Estimation of byte size of the arguments in memory. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -306,7 +306,7 @@ You can use this function in table engine parameters in a CREATE TABLE query whe ## currentUser() -Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query. +Returns the login of current user. Login of user, that initiated query, will be returned in case distributed query. ``` sql SELECT currentUser(); @@ -316,8 +316,8 @@ Alias: `user()`, `USER()`. **Returned values** -- Login of current user. -- Login of user that initiated query in case of disributed query. +- Login of current user. +- Login of user that initiated query in case of distributed query. Type: `String`. @@ -353,12 +353,12 @@ isConstant(x) **Arguments** -- `x` — Expression to check. +- `x` — Expression to check. **Returned values** -- `1` — `x` is constant. -- `0` — `x` is non-constant. +- `1` — `x` is constant. +- `0` — `x` is non-constant. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -424,13 +424,13 @@ Checks whether floating point value is finite. **Arguments** -- `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). +- `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). +- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). **Returned value** -- `x` if `x` is finite. -- `y` if `x` is not finite. +- `x` if `x` is finite. +- `y` if `x` is not finite. **Example** @@ -464,9 +464,9 @@ Allows building a unicode-art diagram. **Arguments** -- `x` — Size to display. -- `min, max` — Integer constants. The value must fit in `Int64`. -- `width` — Constant, positive integer, can be fractional. +- `x` — Size to display. +- `min, max` — Integer constants. The value must fit in `Int64`. +- `width` — Constant, positive integer, can be fractional. The band is drawn with accuracy to one eighth of a symbol. @@ -670,8 +670,8 @@ formatReadableTimeDelta(column[, maximum_unit]) **Arguments** -- `column` — A column with numeric time delta. -- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. +- `column` — A column with numeric time delta. +- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. Example: @@ -715,12 +715,12 @@ parseTimeDelta(timestr) **Arguments** -- `timestr` — A sequence of numbers followed by something resembling a time unit. +- `timestr` — A sequence of numbers followed by something resembling a time unit. **Returned value** -- A floating-point number with the number of seconds. +- A floating-point number with the number of seconds. **Example** @@ -792,7 +792,7 @@ neighbor(column, offset[, default_value]) The result of the function depends on the affected data blocks and the order of data in the block. -:::warning +:::tip It can reach the neighbor rows only inside the currently processed data block. ::: @@ -801,14 +801,14 @@ To prevent that you can make a subquery with [ORDER BY](../../sql-reference/stat **Arguments** -- `column` — A column name or scalar expression. -- `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). -- `default_value` — Optional. The value to be returned if offset goes beyond the scope of the block. Type of data blocks affected. +- `column` — A column name or scalar expression. +- `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). +- `default_value` — Optional. The value to be returned if offset goes beyond the scope of the block. Type of data blocks affected. **Returned values** -- Value for `column` in `offset` distance from current row if `offset` value is not outside block bounds. -- Default value for `column` if `offset` value is outside block bounds. If `default_value` is given, then it will be used. +- Value for `column` in `offset` distance from current row if `offset` value is not outside block bounds. +- Default value for `column` if `offset` value is outside block bounds. If `default_value` is given, then it will be used. Type: type of data blocks affected or default value type. @@ -902,7 +902,7 @@ Result: Calculates the difference between successive row values ​​in the data block. Returns 0 for the first row and the difference from the previous row for each subsequent row. -:::warning +:::tip It can reach the previous row only inside the currently processed data block. ::: @@ -986,7 +986,7 @@ Each event has a start time and an end time. The start time is included in the e The function calculates the total number of active (concurrent) events for each event start time. -:::warning +:::tip Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly. ::: @@ -998,12 +998,12 @@ runningConcurrency(start, end) **Arguments** -- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned values** -- The number of concurrent events at each event start time. +- The number of concurrent events at each event start time. Type: [UInt32](../../sql-reference/data-types/int-uint.md) @@ -1059,12 +1059,12 @@ getSizeOfEnumType(value) **Arguments:** -- `value` — Value of type `Enum`. +- `value` — Value of type `Enum`. **Returned values** -- The number of fields with `Enum` input values. -- An exception is thrown if the type is not `Enum`. +- The number of fields with `Enum` input values. +- An exception is thrown if the type is not `Enum`. **Example** @@ -1088,11 +1088,11 @@ blockSerializedSize(value[, value[, ...]]) **Arguments** -- `value` — Any value. +- `value` — Any value. **Returned values** -- The number of bytes that will be written to disk for block of values (without compression). +- The number of bytes that will be written to disk for block of values (without compression). **Example** @@ -1120,11 +1120,11 @@ toColumnTypeName(value) **Arguments:** -- `value` — Any type of value. +- `value` — Any type of value. **Returned values** -- A string with the name of the class that is used for representing the `value` data type in RAM. +- A string with the name of the class that is used for representing the `value` data type in RAM. **Example of the difference between`toTypeName ' and ' toColumnTypeName`** @@ -1160,11 +1160,11 @@ dumpColumnStructure(value) **Arguments:** -- `value` — Any type of value. +- `value` — Any type of value. **Returned values** -- A string describing the structure that is used for representing the `value` data type in RAM. +- A string describing the structure that is used for representing the `value` data type in RAM. **Example** @@ -1190,13 +1190,13 @@ defaultValueOfArgumentType(expression) **Arguments:** -- `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. +- `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. **Returned values** -- `0` for numbers. -- Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `0` for numbers. +- Empty string for strings. +- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). **Example** @@ -1232,13 +1232,13 @@ defaultValueOfTypeName(type) **Arguments:** -- `type` — A string representing a type name. +- `type` — A string representing a type name. **Returned values** -- `0` for numbers. -- Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `0` for numbers. +- Empty string for strings. +- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). **Example** @@ -1377,8 +1377,8 @@ SELECT replicate(x, arr); **Arguments:** -- `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. -- `x` — The value that the resulting array will be filled with. +- `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. +- `x` — The value that the resulting array will be filled with. **Returned value** @@ -1414,7 +1414,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. +- The amount of remaining space available in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1446,7 +1446,7 @@ filesystemFree() **Returned value** -- Amount of free space in bytes. +- Amount of free space in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1478,7 +1478,7 @@ filesystemCapacity() **Returned value** -- Capacity information of the filesystem in bytes. +- Capacity information of the filesystem in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1510,8 +1510,8 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN) **Arguments** -- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). -- `arg` — Arguments of aggregate function. +- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). +- `arg` — Arguments of aggregate function. **Returned value(s)** @@ -1568,7 +1568,7 @@ INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42))) ``` **See Also** -- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) +- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) ## finalizeAggregation @@ -1582,11 +1582,11 @@ finalizeAggregation(state) **Arguments** -- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). **Returned value(s)** -- Value/values that was aggregated. +- Value/values that was aggregated. Type: Value of any types that was aggregated. @@ -1667,14 +1667,14 @@ Result: ``` **See Also** -- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) -- [initializeAggregation](#initializeaggregation) +- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) +- [initializeAggregation](#initializeaggregation) ## runningAccumulate Accumulates states of an aggregate function for each row of a data block. -:::warning +:::tip The state is reset for each new data block. ::: @@ -1686,12 +1686,12 @@ runningAccumulate(agg_state[, grouping]); **Arguments** -- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). -- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. +- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. **Returned value** -- Each resulting row contains a result of the aggregate function, accumulated for all the input rows from 0 to the current position. `runningAccumulate` resets states for each new data block or when the `grouping` value changes. +- Each resulting row contains a result of the aggregate function, accumulated for all the input rows from 0 to the current position. `runningAccumulate` resets states for each new data block or when the `grouping` value changes. Type depends on the aggregate function used. @@ -1792,9 +1792,9 @@ joinGet(join_storage_table_name, `value_column`, join_keys) **Arguments** -- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. -- `value_column` — name of the column of the table that contains required data. -- `join_keys` — list of keys. +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. +- `value_column` — name of the column of the table that contains required data. +- `join_keys` — list of keys. **Returned value** @@ -1939,13 +1939,13 @@ randomPrintableASCII(length) **Arguments** -- `length` — Resulting string length. Positive integer. +- `length` — Resulting string length. Positive integer. If you pass `length < 0`, behavior of the function is undefined. **Returned value** -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. Type: [String](../../sql-reference/data-types/string.md) @@ -1975,11 +1975,11 @@ randomString(length) **Arguments** -- `length` — String length. Positive integer. +- `length` — String length. Positive integer. **Returned value** -- String filled with random bytes. +- String filled with random bytes. Type: [String](../../sql-reference/data-types/string.md). @@ -2007,8 +2007,8 @@ len: 30 **See Also** -- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) -- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) +- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) +- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) ## randomFixedString @@ -2023,11 +2023,11 @@ randomFixedString(length); **Arguments** -- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Returned value(s)** -- String filled with random bytes. +- String filled with random bytes. Type: [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -2060,11 +2060,11 @@ randomStringUTF8(length); **Arguments** -- `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). **Returned value(s)** -- UTF-8 random string. +- UTF-8 random string. Type: [String](../../sql-reference/data-types/string.md). @@ -2097,11 +2097,11 @@ getSetting('custom_setting'); **Parameter** -- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). +- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). **Returned value** -- The setting current value. +- The setting current value. **Example** @@ -2118,7 +2118,7 @@ SELECT getSetting('custom_a'); **See Also** -- [Custom Settings](../../operations/settings/index.md#custom_settings) +- [Custom Settings](../../operations/settings/index.md#custom_settings) ## isDecimalOverflow @@ -2132,13 +2132,13 @@ isDecimalOverflow(d, [p]) **Arguments** -- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). **Returned values** -- `1` — Decimal value has more digits then it's precision allow, -- `0` — Decimal value satisfies the specified precision. +- `1` — Decimal value has more digits then it's precision allow, +- `0` — Decimal value satisfies the specified precision. **Example** @@ -2169,7 +2169,7 @@ countDigits(x) **Arguments** -- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. +- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. **Returned value** @@ -2177,7 +2177,7 @@ Number of digits. Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). -:::note +:::note For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). ::: @@ -2201,7 +2201,7 @@ Result: **Returned value** -- Variable name for the error code. +- Variable name for the error code. Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). @@ -2230,11 +2230,11 @@ tcpPort() **Arguments** -- None. +- None. **Returned value** -- The TCP port number. +- The TCP port number. Type: [UInt16](../../sql-reference/data-types/int-uint.md). @@ -2256,11 +2256,11 @@ Result: **See Also** -- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) +- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) ## currentProfiles -Returns a list of the current [settings profiles](../../operations/access-rights.md#settings-profiles-management) for the current user. +Returns a list of the current [settings profiles](../../guides/sre/user-management/index.md#settings-profiles-management) for the current user. The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could be used to change the current setting profile. If the command `SET PROFILE` was not used the function returns the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)). @@ -2272,7 +2272,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. +- List of the current user settings profiles. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2288,7 +2288,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. +- List of the enabled settings profiles. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2304,7 +2304,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. +- List of the default settings profiles. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2320,7 +2320,7 @@ currentRoles() **Returned value** -- List of the current roles for the current user. +- List of the current roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2336,13 +2336,13 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. +- List of the enabled roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). ## defaultRoles -Returns the names of the roles which are enabled by default for the current user when he logins. Initially these are all roles granted to the current user (see [GRANT](../../sql-reference/statements/grant/#grant-select)), but that can be changed with the [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement) statement. +Returns the names of the roles which are enabled by default for the current user when he logins. Initially these are all roles granted to the current user (see [GRANT](../../sql-reference/statements/grant.md#grant-select)), but that can be changed with the [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement) statement. **Syntax** @@ -2352,7 +2352,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. +- List of the default roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2368,22 +2368,22 @@ getServerPort(port_name) **Arguments** -- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: +- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: - - 'tcp_port' - - 'tcp_port_secure' - - 'http_port' - - 'https_port' - - 'interserver_http_port' - - 'interserver_https_port' - - 'mysql_port' - - 'postgresql_port' - - 'grpc_port' - - 'prometheus.port' + - 'tcp_port' + - 'tcp_port_secure' + - 'http_port' + - 'https_port' + - 'interserver_http_port' + - 'interserver_https_port' + - 'mysql_port' + - 'postgresql_port' + - 'grpc_port' + - 'prometheus.port' **Returned value** -- The number of the server port. +- The number of the server port. Type: [UInt16](../../sql-reference/data-types/int-uint.md). @@ -2417,7 +2417,7 @@ queryID() **Returned value** -- The ID of the current query. +- The ID of the current query. Type: [String](../../sql-reference/data-types/string.md) @@ -2453,7 +2453,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. +- The ID of the initial current query. Type: [String](../../sql-reference/data-types/string.md) @@ -2488,7 +2488,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. +- Shard index or constant `0`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -2499,7 +2499,7 @@ In the following example a configuration with two shards is used. The query is e Query: ``` sql -CREATE TABLE shard_num_example (dummy UInt8) +CREATE TABLE shard_num_example (dummy UInt8) ENGINE=Distributed(test_cluster_two_shards_localhost, system, one, dummy); SELECT dummy, shardNum(), shardCount() FROM shard_num_example; ``` @@ -2515,7 +2515,7 @@ Result: **See Also** -- [Distributed Table Engine](../../engines/table-engines/special/distributed.md) +- [Distributed Table Engine](../../engines/table-engines/special/distributed.md) ## shardCount @@ -2530,7 +2530,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. +- Total number of shards or `0`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -2550,11 +2550,11 @@ getOSKernelVersion() **Arguments** -- None. +- None. **Returned value** -- The current OS kernel version. +- The current OS kernel version. Type: [String](../../sql-reference/data-types/string.md). @@ -2586,11 +2586,11 @@ zookeeperSessionUptime() **Arguments** -- None. +- None. **Returned value** -- Uptime of the current ZooKeeper session in seconds. +- Uptime of the current ZooKeeper session in seconds. Type: [UInt32](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index eb6866d28ea..e90d537fb74 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -1,58 +1,44 @@ --- slug: /en/sql-reference/functions/random-functions -sidebar_position: 51 -sidebar_label: Pseudo-Random Numbers +sidebar_position: 145 +sidebar_label: Random Numbers --- -# Functions for Generating Pseudo-Random Numbers +# Functions for Generating Random Numbers -All the functions accept zero arguments or one argument. If an argument is passed, it can be any type, and its value is not used for anything. The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers. +All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent prevent [common subexpression +elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different execution of the same random +function in a query return different random values. -:::note -Non-cryptographic generators of pseudo-random numbers are used. +Related content +- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) + +:::note +The random numbers are generated by non-cryptographic algorithms. ::: ## rand, rand32 -Returns a pseudo-random UInt32 number, evenly distributed among all UInt32-type numbers. +Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers. Uses a linear congruential generator. ## rand64 -Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type numbers. +Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers. Uses a linear congruential generator. ## randCanonical -The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). -Non-deterministic. Return type is Float64. +Returns a Float64 value, evenly distributed in [0, 1). ## randConstant -Produces a constant column with a random value. - -**Syntax** - -``` sql -randConstant([x]) -``` - -**Arguments** - -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. - -**Returned value** - -- Pseudo-random number. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +Like `rand` but produces a constant column with a random value. **Example** -Query: - ``` sql SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number) FROM numbers(3) @@ -60,7 +46,7 @@ FROM numbers(3) Result: -``` text +``` result ┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┐ │ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │ │ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │ @@ -68,17 +54,11 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` -# Functions for Generating Random Numbers based on Distributions - -:::note -These functions are available starting from 22.10. -::: - - +# Functions for Generating Random Numbers based on a Distribution ## randUniform -Return random number based on [continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution) in a specified range from `min` to `max`. +Returns a Float64 drawn uniformly from the interval between `min` and `max` ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). **Syntax** @@ -93,21 +73,19 @@ randUniform(min, max) **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randUniform(5.5, 10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randUniform(5.5, 10)─┐ │ 8.094978491443102 │ │ 7.3181248914450885 │ @@ -117,40 +95,36 @@ Result: └──────────────────────┘ ``` - - ## randNormal -Return random number based on [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). +Returns a Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). **Syntax** ``` sql -randNormal(meam, variance) +randNormal(mean, variance) ``` **Arguments** -- `meam` - `Float64` mean value of distribution, +- `mean` - `Float64` - mean value of distribution, - `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randNormal(10, 2) FROM numbers(5) ``` Result: -``` text +``` result ┌──randNormal(10, 2)─┐ │ 13.389228911709653 │ │ 8.622949707401295 │ @@ -160,40 +134,36 @@ Result: └────────────────────┘ ``` - - ## randLogNormal -Return random number based on [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). +Returns a Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). **Syntax** ``` sql -randLogNormal(meam, variance) +randLogNormal(mean, variance) ``` **Arguments** -- `meam` - `Float64` mean value of distribution, +- `mean` - `Float64` - mean value of distribution, - `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randLogNormal(100, 5) FROM numbers(5) ``` Result: -``` text +``` result ┌─randLogNormal(100, 5)─┐ │ 1.295699673937363e48 │ │ 9.719869109186684e39 │ @@ -203,11 +173,9 @@ Result: └───────────────────────┘ ``` - - ## randBinomial -Return random number based on [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). +Returns a UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). **Syntax** @@ -217,26 +185,24 @@ randBinomial(experiments, probability) **Arguments** -- `experiments` - `UInt64` number of experiments, +- `experiments` - `UInt64` - number of experiments, - `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only). **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randBinomial(100, .75) FROM numbers(5) ``` Result: -``` text +``` result ┌─randBinomial(100, 0.75)─┐ │ 74 │ │ 78 │ @@ -246,11 +212,9 @@ Result: └─────────────────────────┘ ``` - - ## randNegativeBinomial -Return random number based on [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). +Returns a UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). **Syntax** @@ -260,26 +224,24 @@ randNegativeBinomial(experiments, probability) **Arguments** -- `experiments` - `UInt64` number of experiments, +- `experiments` - `UInt64` - number of experiments, - `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only). **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randNegativeBinomial(100, .75) FROM numbers(5) ``` Result: -``` text +``` result ┌─randNegativeBinomial(100, 0.75)─┐ │ 33 │ │ 32 │ @@ -289,11 +251,9 @@ Result: └─────────────────────────────────┘ ``` - - ## randPoisson -Return random number based on [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). +Returns a UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). **Syntax** @@ -303,25 +263,23 @@ randPoisson(n) **Arguments** -- `n` - `UInt64` mean number of occurrences. +- `n` - `UInt64` - mean number of occurrences. **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randPoisson(10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randPoisson(10)─┐ │ 8 │ │ 8 │ @@ -331,11 +289,9 @@ Result: └─────────────────┘ ``` - - ## randBernoulli -Return random number based on [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). +Returns a UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). **Syntax** @@ -349,21 +305,19 @@ randBernoulli(probability) **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randBernoulli(.75) FROM numbers(5) ``` Result: -``` text +``` result ┌─randBernoulli(0.75)─┐ │ 1 │ │ 1 │ @@ -373,11 +327,9 @@ Result: └─────────────────────┘ ``` - - ## randExponential -Return random number based on [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). +Returns a Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). **Syntax** @@ -387,25 +339,23 @@ randExponential(lambda) **Arguments** -- `lambda` - `Float64` lambda value. +- `lambda` - `Float64` - lambda value. **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randExponential(1/10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randExponential(divide(1, 10))─┐ │ 44.71628934340778 │ │ 4.211013337903262 │ @@ -415,11 +365,9 @@ Result: └────────────────────────────────┘ ``` - - ## randChiSquared -Return random number based on [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. +Returns a Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. **Syntax** @@ -429,25 +377,23 @@ randChiSquared(degree_of_freedom) **Arguments** -- `degree_of_freedom` - `Float64` degree of freedom. +- `degree_of_freedom` - `Float64` - degree of freedom. **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randChiSquared(10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randChiSquared(10)─┐ │ 10.015463656521543 │ │ 9.621799919882768 │ @@ -457,11 +403,9 @@ Result: └────────────────────┘ ``` - - ## randStudentT -Return random number based on [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). +Returns a Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). **Syntax** @@ -471,25 +415,23 @@ randStudentT(degree_of_freedom) **Arguments** -- `degree_of_freedom` - `Float64` degree of freedom. +- `degree_of_freedom` - `Float64` - degree of freedom. **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randStudentT(10) FROM numbers(5) ``` Result: -``` text +``` result ┌─────randStudentT(10)─┐ │ 1.2217309938538725 │ │ 1.7941971681200541 │ @@ -499,11 +441,9 @@ Result: └──────────────────────┘ ``` - - ## randFisherF -Return random number based on [F-distribution](https://en.wikipedia.org/wiki/F-distribution). +Returns a Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution). **Syntax** @@ -513,26 +453,24 @@ randFisherF(d1, d2) **Arguments** -- `d1` - `Float64` d1 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, -- `d2` - `Float64` d2 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, +- `d1` - `Float64` - d1 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, +- `d2` - `Float64` - d2 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randFisherF(10, 3) FROM numbers(5) ``` Result: -``` text +``` result ┌──randFisherF(10, 3)─┐ │ 7.286287504216609 │ │ 0.26590779413050386 │ @@ -542,35 +480,61 @@ Result: └─────────────────────┘ ``` - - - -# Random Functions for Working with Strings +# Functions for Generating Random Strings ## randomString +Returns a random String of specified `length`. Not all characters may be printable. + +**Syntax** + +```sql +randomString(length) +``` + ## randomFixedString +Like `randomString` but returns a FixedString. + ## randomPrintableASCII +Returns a random String of specified `length`. All characters are printable. + +**Syntax** + +```sql +randomPrintableASCII(length) +``` + ## randomStringUTF8 +Returns a random String containing `length` many UTF8 codepoints. Not all characters may be printable + +**Syntax** + +```sql +randomStringUTF8(length) +``` + ## fuzzBits **Syntax** -``` sql -fuzzBits([s], [prob]) -``` +Inverts the bits of String or FixedString `s`, each with probability `prob`. -Inverts bits of `s`, each with probability `prob`. +**Syntax** + +``` sql +fuzzBits(s, prob) +``` **Arguments** - `s` - `String` or `FixedString` - `prob` - constant `Float32/64` **Returned value** -Fuzzed string with same as s type. + +Fuzzed string with same type as `s`. **Example** @@ -581,13 +545,10 @@ FROM numbers(3) Result: -``` text +``` result ┌─fuzzBits(materialize('abacaba'), 0.1)─┐ │ abaaaja │ │ a*cjab+ │ │ aeca2A │ └───────────────────────────────────────┘ ``` - -## Related content -- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 01ee720cfd3..e9a0ed72466 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/rounding-functions -sidebar_position: 45 +sidebar_position: 155 sidebar_label: Rounding --- @@ -38,11 +38,11 @@ round(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). -- `decimal-places` — An integer value. - - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. - - If `decimal-places = 0` then the function rounds the value to integer. In this case the argument can be omitted. +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `decimal-places` — An integer value. + - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. + - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. + - If `decimal-places = 0` then the function rounds the value to integer. In this case the argument can be omitted. **Returned value:** @@ -101,27 +101,27 @@ round(3.65, 1) = 3.6 **See Also** -- [roundBankers](#roundbankers) +- [roundBankers](#roundbankers) ## roundBankers Rounds a number to a specified decimal position. -- If the rounding number is halfway between two numbers, the function uses banker’s rounding. +- If the rounding number is halfway between two numbers, the function uses banker’s rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`. -- In other cases, the function rounds numbers to the nearest integer. +- In other cases, the function rounds numbers to the nearest integer. Using banker’s rounding, you can reduce the effect that rounding numbers has on the results of summing or subtracting these numbers. For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: -- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12. -- Banker’s rounding: 2 + 2 + 4 + 4 = 12. -- Rounding to the nearest integer: 2 + 3 + 4 + 5 = 14. +- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12. +- Banker’s rounding: 2 + 2 + 4 + 4 = 12. +- Rounding to the nearest integer: 2 + 3 + 4 + 5 = 14. **Syntax** @@ -131,11 +131,11 @@ roundBankers(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). -- `decimal-places` — Decimal places. An integer number. - - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. - - `decimal-places = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`. +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `decimal-places` — Decimal places. An integer number. + - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. + - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. + - `decimal-places = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`. **Returned value** @@ -182,7 +182,7 @@ roundBankers(10.755, 2) = 10.76 **See Also** -- [round](#rounding_functions-round) +- [round](#rounding_functions-round) ## roundToExp2(num) @@ -194,7 +194,14 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro ## roundAge(num) -Accepts a number. If the number is less than 18, it returns 0. Otherwise, it rounds the number down to a number from the set: 18, 25, 35, 45, 55. +Accepts a number. If the number is +- smaller than 1, it returns 0, +- between 1 and 17, it returns 17, +- between 18 and 24, it returns 18, +- between 25 and 34, it returns 25, +- between 35 and 44, it returns 35, +- between 45 and 54, it returns 45, +- larger than 55, it returns 55. ## roundDown(num, arr) diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 6015bb79b87..7336e53fc24 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -1,14 +1,14 @@ --- slug: /en/sql-reference/functions/splitting-merging-functions -sidebar_position: 47 -sidebar_label: Splitting and Merging Strings and Arrays +sidebar_position: 165 +sidebar_label: Splitting Strings --- -# Functions for Splitting and Merging Strings and Arrays +# Functions for Splitting Strings -## splitByChar(separator, s[, max_substrings]) +## splitByChar -Splits a string into substrings separated by a specified character. It uses a constant string `separator` which consists of exactly one character. +Splits a string into substrings separated by a specified character. Uses a constant string `separator` which consists of exactly one character. Returns an array of selected substrings. Empty substrings may be selected if the separator occurs at the beginning or end of the string, or if there are multiple consecutive separators. **Syntax** @@ -19,17 +19,17 @@ splitByChar(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** Returns an array of selected substrings. Empty substrings may be selected when: -- A separator occurs at the beginning or end of the string; -- There are multiple consecutive separators; -- The original string `s` is empty. +- A separator occurs at the beginning or end of the string; +- There are multiple consecutive separators; +- The original string `s` is empty. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -39,13 +39,15 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere SELECT splitByChar(',', '1,2,3,abcde'); ``` +Result: + ``` text ┌─splitByChar(',', '1,2,3,abcde')─┐ │ ['1','2','3','abcde'] │ └─────────────────────────────────┘ ``` -## splitByString(separator, s[, max_substrings]) +## splitByString Splits a string into substrings separated by a string. It uses a constant string `separator` of multiple characters as the separator. If the string `separator` is empty, it will split the string `s` into an array of single characters. @@ -57,9 +59,9 @@ splitByString(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `separator` — The separator. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -68,9 +70,9 @@ Returns an array of selected substrings. Empty substrings may be selected when: Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). -- A non-empty separator occurs at the beginning or end of the string; -- There are multiple consecutive non-empty separators; -- The original string `s` is empty while the separator is not empty. +- A non-empty separator occurs at the beginning or end of the string; +- There are multiple consecutive non-empty separators; +- The original string `s` is empty while the separator is not empty. **Example** @@ -78,6 +80,8 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere SELECT splitByString(', ', '1, 2 3, 4,5, abcde'); ``` +Result: + ``` text ┌─splitByString(', ', '1, 2 3, 4,5, abcde')─┐ │ ['1','2 3','4,5','abcde'] │ @@ -88,13 +92,15 @@ SELECT splitByString(', ', '1, 2 3, 4,5, abcde'); SELECT splitByString('', 'abcde'); ``` +Result: + ``` text ┌─splitByString('', 'abcde')─┐ │ ['a','b','c','d','e'] │ └────────────────────────────┘ ``` -## splitByRegexp(regexp, s[, max_substrings]) +## splitByRegexp Splits a string into substrings separated by a regular expression. It uses a regular expression string `regexp` as the separator. If the `regexp` is empty, it will split the string `s` into an array of single characters. If no match is found for this regular expression, the string `s` won't be split. @@ -106,25 +112,23 @@ splitByRegexp(regexp, s[, max_substrings])) **Arguments** -- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** Returns an array of selected substrings. Empty substrings may be selected when: -- A non-empty regular expression match occurs at the beginning or end of the string; -- There are multiple consecutive non-empty regular expression matches; -- The original string `s` is empty while the regular expression is not empty. +- A non-empty regular expression match occurs at the beginning or end of the string; +- There are multiple consecutive non-empty regular expression matches; +- The original string `s` is empty while the regular expression is not empty. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). **Example** -Query: - ``` sql SELECT splitByRegexp('\\d+', 'a12bc23de345f'); ``` @@ -137,8 +141,6 @@ Result: └────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT splitByRegexp('', 'abcde'); ``` @@ -151,7 +153,7 @@ Result: └────────────────────────────┘ ``` -## splitByWhitespace(s[, max_substrings]) +## splitByWhitespace Splits a string into substrings separated by whitespace characters. Returns an array of selected substrings. @@ -164,8 +166,8 @@ splitByWhitespace(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -180,13 +182,15 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere SELECT splitByWhitespace(' 1! a, b. '); ``` +Result: + ``` text ┌─splitByWhitespace(' 1! a, b. ')─┐ │ ['1!','a,','b.'] │ └─────────────────────────────────────┘ ``` -## splitByNonAlpha(s[, max_substrings]) +## splitByNonAlpha Splits a string into substrings separated by whitespace and punctuation characters. Returns an array of selected substrings. @@ -199,8 +203,8 @@ splitByNonAlpha(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -221,23 +225,32 @@ SELECT splitByNonAlpha(' 1! a, b. '); └───────────────────────────────────┘ ``` -## arrayStringConcat(arr\[, separator\]) +## arrayStringConcat Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default. Returns the string. +**Syntax** + +```sql +arrayStringConcat(arr\[, separator\]) +``` + **Example** ``` sql SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString; ``` + +Result: + ```text ┌─DateString──────────┐ │ 12/05/2021 12:50:00 │ └─────────────────────┘ ``` -## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings]) +## alphaTokens Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings. @@ -245,13 +258,14 @@ Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an a ``` sql alphaTokens(s[, max_substrings])) -splitByAlpha(s[, max_substrings]) ``` +Alias: `splitByAlpha` + **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -271,7 +285,7 @@ SELECT alphaTokens('abca1abc'); └─────────────────────────┘ ``` -## extractAllGroups(text, regexp) +## extractAllGroups Extracts all groups from non-overlapping substrings matched by a regular expression. @@ -283,21 +297,19 @@ extractAllGroups(text, regexp) **Arguments** -- `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). +- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). -- If there is no matching group, returns an empty array. +- If there is no matching group, returns an empty array. Type: [Array](../data-types/array.md). **Example** -Query: - ``` sql SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ``` @@ -312,7 +324,7 @@ Result: ## ngrams -Splits the UTF-8 string into n-grams of `ngramsize` symbols. +Splits a UTF-8 string into n-grams of `ngramsize` symbols. **Syntax** @@ -322,19 +334,17 @@ ngrams(string, ngramsize) **Arguments** -- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). +- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). **Returned values** -- Array with n-grams. +- Array with n-grams. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). **Example** -Query: - ``` sql SELECT ngrams('ClickHouse', 3); ``` @@ -353,18 +363,16 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. **Returned value** -- The resulting array of tokens from input string. +- The resulting array of tokens from input string. Type: [Array](../data-types/array.md). **Example** -Query: - ``` sql SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens; ``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 845be6e04c7..8662d08431c 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1,50 +1,46 @@ --- slug: /en/sql-reference/functions/string-functions -sidebar_position: 40 +sidebar_position: 170 sidebar_label: Strings --- # Functions for Working with Strings -:::note -Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [replacing](../../sql-reference/functions/string-replace-functions.md) in strings are described separately. -::: +Functions for [searching](string-search-functions.md) in strings and for [replacing](string-replace-functions.md) in strings are described separately. ## empty Checks whether the input string is empty. +A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. + +The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty). + **Syntax** ``` sql empty(x) ``` -A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte. - -The function also works for [arrays](array-functions.md#function-empty) or [UUID](uuid-functions.md#empty). - **Arguments** -- `x` — Input value. [String](../data-types/string.md). +- `x` — Input value. [String](../data-types/string.md). **Returned value** -- Returns `1` for an empty string or `0` for a non-empty string. +- Returns `1` for an empty string or `0` for a non-empty string. Type: [UInt8](../data-types/int-uint.md). **Example** -Query: - ```sql SELECT empty(''); ``` Result: -```text +```result ┌─empty('')─┐ │ 1 │ └───────────┘ @@ -54,37 +50,35 @@ Result: Checks whether the input string is non-empty. +A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. + +The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty). + **Syntax** ``` sql notEmpty(x) ``` -A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte. - -The function also works for [arrays](array-functions.md#function-notempty) or [UUID](uuid-functions.md#notempty). - **Arguments** -- `x` — Input value. [String](../data-types/string.md). +- `x` — Input value. [String](../data-types/string.md). **Returned value** -- Returns `1` for a non-empty string or `0` for an empty string string. +- Returns `1` for a non-empty string or `0` for an empty string string. Type: [UInt8](../data-types/int-uint.md). **Example** -Query: - ```sql SELECT notEmpty('text'); ``` Result: -```text +```result ┌─notEmpty('text')─┐ │ 1 │ └──────────────────┘ @@ -92,58 +86,51 @@ Result: ## length -Returns the length of a string in bytes (not in characters, and not in code points). -The result type is UInt64. +Returns the length of a string in bytes (not: in characters or Unicode code points). + The function also works for arrays. ## lengthUTF8 -Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). -The result type is UInt64. +Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## char_length, CHAR_LENGTH - -Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). -The result type is UInt64. - -## character_length, CHARACTER_LENGTH - -Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). -The result type is UInt64. +Alias: +- `CHAR_LENGTH`` +- `CHARACTER_LENGTH` ## leftPad -Pads the current string from the left with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `LPAD` function. +Pads a string from the left with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`. **Syntax** ``` sql -leftPad('string', 'length'[, 'pad_string']) +leftPad(string, length[, pad_string]) ``` +Alias: `LPAD` + **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A left-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT leftPad('abc', 7, '*'), leftPad('def', 7); ``` Result: -``` text +```result ┌─leftPad('abc', 7, '*')─┬─leftPad('def', 7)─┐ │ ****abc │ def │ └────────────────────────┴───────────────────┘ @@ -151,37 +138,35 @@ Result: ## leftPadUTF8 -Pads the current string from the left with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `LPAD` function. While in the [leftPad](#leftpad) function the length is measured in bytes, here in the `leftPadUTF8` function it is measured in code points. +Pads the string from the left with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Unlike [leftPad](#leftpad) which measures the string length in bytes, the string length is measured in code points. **Syntax** ``` sql -leftPadUTF8('string','length'[, 'pad_string']) +leftPadUTF8(string, length[, pad_string]) ``` **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A left-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT leftPadUTF8('абвг', 7, '*'), leftPadUTF8('дежз', 7); ``` Result: -``` text +```result ┌─leftPadUTF8('абвг', 7, '*')─┬─leftPadUTF8('дежз', 7)─┐ │ ***абвг │ дежз │ └─────────────────────────────┴────────────────────────┘ @@ -189,37 +174,37 @@ Result: ## rightPad -Pads the current string from the right with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `RPAD` function. +Pads a string from the right with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`. **Syntax** ``` sql -rightPad('string', 'length'[, 'pad_string']) +rightPad(string, length[, pad_string]) ``` +Alias: `RPAD` + **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A left-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT rightPad('abc', 7, '*'), rightPad('abc', 7); ``` Result: -``` text +```result ┌─rightPad('abc', 7, '*')─┬─rightPad('abc', 7)─┐ │ abc**** │ abc │ └─────────────────────────┴────────────────────┘ @@ -227,81 +212,89 @@ Result: ## rightPadUTF8 -Pads the current string from the right with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `RPAD` function. While in the [rightPad](#rightpad) function the length is measured in bytes, here in the `rightPadUTF8` function it is measured in code points. +Pads the string from the right with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Unlike [rightPad](#rightpad) which measures the string length in bytes, the string length is measured in code points. **Syntax** ``` sql -rightPadUTF8('string','length'[, 'pad_string']) +rightPadUTF8(string, length[, pad_string]) ``` **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A right-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT rightPadUTF8('абвг', 7, '*'), rightPadUTF8('абвг', 7); ``` Result: -``` text +```result ┌─rightPadUTF8('абвг', 7, '*')─┬─rightPadUTF8('абвг', 7)─┐ │ абвг*** │ абвг │ └──────────────────────────────┴─────────────────────────┘ ``` -## lower, lcase +## lower -Converts ASCII Latin symbols in a string to lowercase. +Converts the ASCII Latin symbols in a string to lowercase. -## upper, ucase +Alias: `lcase` -Converts ASCII Latin symbols in a string to uppercase. +## upper + +Converts the ASCII Latin symbols in a string to uppercase. + +Alias: `ucase` ## lowerUTF8 -Converts a string to lowercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text. -It does not detect the language. E.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. -If the string contains a sequence of bytes that are not valid UTF-8, then the behavior is undefined. ## upperUTF8 -Converts a string to uppercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text. -It does not detect the language. E.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. -If the string contains a sequence of bytes that are not valid UTF-8, then the behavior is undefined. ## isValidUTF8 -Returns 1, if the set of bytes is valid UTF-8 encoded, otherwise 0. +Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. ## toValidUTF8 Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character. +**Syntax** + ``` sql toValidUTF8(input_string) ``` **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. -Returned value: Valid UTF-8 string. +**Returned value** + +- A valid UTF-8 string. **Example** @@ -309,7 +302,7 @@ Returned value: Valid UTF-8 string. SELECT toValidUTF8('\x61\xF0\x80\x80\x80b'); ``` -``` text +```result ┌─toValidUTF8('a����b')─┐ │ a�b │ └───────────────────────┘ @@ -317,9 +310,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b'); ## repeat -Repeats a string as many times as specified and concatenates the replicated values as a single string. - -Alias: `REPEAT`. +Concatenates a string as many times with itself as specified. **Syntax** @@ -327,14 +318,52 @@ Alias: `REPEAT`. repeat(s, n) ``` +Alias: `REPEAT` + **Arguments** -- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). -- `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md). +- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). +- `n` — The number of times to repeat the string. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). **Returned value** -The single string, which contains the string `s` repeated `n` times. If `n` \< 1, the function returns empty string. +A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. + +Type: `String`. + +**Example** + +``` sql +SELECT repeat('abc', 10); +``` + +Result: + +```result +┌─repeat('abc', 10)──────────────┐ +│ abcabcabcabcabcabcabcabcabcabc │ +└────────────────────────────────┘ +``` + +## space + +Concatenates a space (` `) as many times with itself as specified. + +**Syntax** + +``` sql +space(n) +``` + +Alias: `SPACE`. + +**Arguments** + +- `n` — The number of times to repeat the space. [UInt* or Int*](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. Type: `String`. @@ -343,44 +372,54 @@ Type: `String`. Query: ``` sql -SELECT repeat('abc', 10); +SELECT space(3); ``` Result: ``` text -┌─repeat('abc', 10)──────────────┐ -│ abcabcabcabcabcabcabcabcabcabc │ -└────────────────────────────────┘ +┌─space(3) ────┐ +│ │ +└──────────────┘ ``` ## reverse -Reverses the string (as a sequence of bytes). +Reverses the sequence of bytes in a string. ## reverseUTF8 -Reverses a sequence of Unicode code points, assuming that the string contains a set of bytes representing a UTF-8 text. Otherwise, it does something else (it does not throw an exception). +Reverses a sequence of Unicode code points in a string. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## format(pattern, s0, s1, …) +## format -Formatting constant pattern with the string listed in the arguments. `pattern` is a simplified Python format pattern. Format string contains “replacement fields” surrounded by curly braces `{}`. Anything that is not contained in braces is considered literal text, which is copied unchanged to the output. If you need to include a brace character in the literal text, it can be escaped by doubling: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are treated as consequence numbers). +Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitely given monotonically increasing numbers). + +**Syntax** + +```sql +format(pattern, s0, s1, …) +``` + +**Example** ``` sql SELECT format('{1} {0} {1}', 'World', 'Hello') ``` -``` text +```result ┌─format('{1} {0} {1}', 'World', 'Hello')─┐ │ Hello World Hello │ └─────────────────────────────────────────┘ ``` +With implicit numbers: + ``` sql SELECT format('{} {}', 'Hello', 'World') ``` -``` text +```result ┌─format('{} {}', 'Hello', 'World')─┐ │ Hello World │ └───────────────────────────────────┘ @@ -388,7 +427,7 @@ SELECT format('{} {}', 'Hello', 'World') ## concat -Concatenates the strings listed in the arguments, without a separator. +Concatenates the strings listed in the arguments without separator. **Syntax** @@ -402,21 +441,19 @@ Values of type String or FixedString. **Returned values** -Returns the String that results from concatenating the arguments. +The String created by concatenating the arguments. -If any of argument values is `NULL`, `concat` returns `NULL`. +If any of arguments is `NULL`, the function returns `NULL`. **Example** -Query: - ``` sql SELECT concat('Hello, ', 'World!'); ``` Result: -``` text +```result ┌─concat('Hello, ', 'World!')─┐ │ Hello, World! │ └─────────────────────────────┘ @@ -424,9 +461,9 @@ Result: ## concatAssumeInjective -Same as [concat](#concat), the difference is that you need to ensure that `concat(s1, s2, ...) → sn` is injective, it will be used for optimization of GROUP BY. +Like [concat](#concat) but assumes that `concat(s1, s2, ...) → sn` is injective. Can be used for optimization of GROUP BY. -The function is named “injective” if it always returns different result for different values of arguments. In other words: different arguments never yield identical result. +A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result. **Syntax** @@ -440,9 +477,9 @@ Values of type String or FixedString. **Returned values** -Returns the String that results from concatenating the arguments. +The String created by concatenating the arguments. -If any of argument values is `NULL`, `concatAssumeInjective` returns `NULL`. +If any of argument values is `NULL`, the function returns `NULL`. **Example** @@ -454,7 +491,7 @@ INSERT INTO key_val VALUES ('Hello, ','World',1), ('Hello, ','World',2), ('Hello SELECT * from key_val; ``` -``` text +```result ┌─key1────┬─key2─────┬─value─┐ │ Hello, │ World │ 1 │ │ Hello, │ World │ 2 │ @@ -463,15 +500,13 @@ SELECT * from key_val; └─────────┴──────────┴───────┘ ``` -Query: - ``` sql SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY concatAssumeInjective(key1, key2); ``` Result: -``` text +```result ┌─concat(key1, key2)─┬─sum(value)─┐ │ Hello, World! │ 3 │ │ Hello, World! │ 2 │ @@ -479,25 +514,88 @@ Result: └────────────────────┴────────────┘ ``` -## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) +## concatWithSeparator -Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL). +Concatenates the given strings with a given separator. -## substringUTF8(s, offset, length) +**Syntax** -The same as ‘substring’, but for Unicode code points. Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). +``` sql +concatWithSeparator(sep, expr1, expr2, expr3...) +``` -## appendTrailingCharIfAbsent(s, c) +**Arguments** -If the ‘s’ string is non-empty and does not contain the ‘c’ character at the end, it appends the ‘c’ character to the end. +- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -## convertCharset(s, from, to) +**Returned values** -Returns the string ‘s’ that was converted from the encoding in ‘from’ to the encoding in ‘to’. +The String created by concatenating the arguments. -## base58Encode(plaintext) +If any of the argument values is `NULL`, the function returns `NULL`. -Accepts a String and encodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet. +**Example** + +``` sql +SELECT concatWithSeparator('a', '1', '2', '3', '4') +``` + +Result: + +```result +┌─concatWithSeparator('a', '1', '2', '3', '4')─┐ +│ 1a2a3a4 │ +└───────────────────────────────────┘ +``` + +## concatWithSeparatorAssumeInjective + +Like `concatWithSeparator` but assumes that `concatWithSeparator(sep, expr1, expr2, expr3...) → result` is injective. Can be used for optimization of GROUP BY. + +A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result. + +## substring(s, offset, length) + +Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. + +**Syntax** + +```sql +substring(s, offset, length) +``` + +Alias: +- `substr` +- `mid` + +## substringUTF8 + +Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +## appendTrailingCharIfAbsent + +Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`. + +**Syntax** + +```sql +appendTrailingCharIfAbsent(s, c) +``` + +## convertCharset + +Returns string `s` converted from the encoding `from` to encoding `to`. + +**Syntax** + +```sql +convertCharset(s, from, to) +``` + +## base58Encode + +Encodes a String using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) in the "Bitcoin" alphabet. **Syntax** @@ -511,117 +609,107 @@ base58Encode(plaintext) **Returned value** -- A string containing encoded value of 1st argument. +- A string containing the encoded value of the argument. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT base58Encode('Encoded'); ``` Result: -```text + +```result ┌─base58Encode('Encoded')─┐ │ 3dc8KtHrwM │ └─────────────────────────┘ ``` -## base58Decode(encoded_text) +## base58Decode Accepts a String and decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet. **Syntax** ```sql -base58Decode(encoded_text) +base58Decode(encoded) ``` **Arguments** -- `encoded_text` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown. +- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. **Returned value** -- A string containing decoded value of 1st argument. +- A string containing the decoded value of the argument. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT base58Decode('3dc8KtHrwM'); ``` Result: -```text + +```result ┌─base58Decode('3dc8KtHrwM')─┐ │ Encoded │ └────────────────────────────┘ ``` -## tryBase58Decode(s) +## tryBase58Decode -Similar to base58Decode, but returns an empty string in case of error. +Like `base58Decode` but returns an empty string in case of error. -## base64Encode(s) +## base64Encode -Encodes ‘s’ FixedString or String into base64. +Encodes a String or FixedString as base64. Alias: `TO_BASE64`. -## base64Decode(s) +## base64Decode -Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception. +Decodes a base64-encoded String or FixedString. Throws an exception in case of error. Alias: `FROM_BASE64`. -## tryBase64Decode(s) +## tryBase64Decode -Similar to base64Decode, but returns an empty string in case of error. +Like `base64Decode` but returns an empty string in case of error. -## endsWith(s, suffix) +## endsWith -Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. +Returns whether string `str` ends with `suffix`. -## startsWith(str, prefix) +**Syntax** -Returns 1 whether string starts with the specified prefix, otherwise it returns 0. +```sql +endsWith(str, suffix) +``` + +## startsWith + +Returns whether string `str` starts with `prefix`. + +**Syntax** + +```sql +startsWith(str, prefix) +``` + +**Example** ``` sql SELECT startsWith('Spider-Man', 'Spi'); ``` -**Returned values** - -- 1, if the string starts with the specified prefix. -- 0, if the string does not start with the specified prefix. - -**Example** - -Query: - -``` sql -SELECT startsWith('Hello, world!', 'He'); -``` - -Result: - -``` text -┌─startsWith('Hello, world!', 'He')─┐ -│ 1 │ -└───────────────────────────────────┘ -``` - ## trim -Removes all specified characters from the start or end of a string. -By default removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. +Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32). **Syntax** @@ -631,26 +719,24 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Arguments** -- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). -- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). +- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). **Returned value** -A string without leading and (or) trailing specified characters. +A string without leading and/or trailing specified characters. Type: `String`. **Example** -Query: - ``` sql SELECT trim(BOTH ' ()' FROM '( Hello, world! )'); ``` Result: -``` text +```result ┌─trim(BOTH ' ()' FROM '( Hello, world! )')─┐ │ Hello, world! │ └───────────────────────────────────────────────┘ @@ -658,7 +744,7 @@ Result: ## trimLeft -Removes all consecutive occurrences of common whitespace (ASCII character 32) from the beginning of a string. It does not remove other kinds of whitespace characters (tab, no-break space, etc.). +Removes the consecutive occurrences of whitespace (ASCII-character 32) from the start of a string. **Syntax** @@ -670,7 +756,7 @@ Alias: `ltrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -680,15 +766,13 @@ Type: `String`. **Example** -Query: - ``` sql SELECT trimLeft(' Hello, world! '); ``` Result: -``` text +```result ┌─trimLeft(' Hello, world! ')─┐ │ Hello, world! │ └─────────────────────────────────────┘ @@ -696,7 +780,7 @@ Result: ## trimRight -Removes all consecutive occurrences of common whitespace (ASCII character 32) from the end of a string. It does not remove other kinds of whitespace characters (tab, no-break space, etc.). +Removes the consecutive occurrences of whitespace (ASCII-character 32) from the end of a string. **Syntax** @@ -708,7 +792,7 @@ Alias: `rtrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -718,15 +802,13 @@ Type: `String`. **Example** -Query: - ``` sql SELECT trimRight(' Hello, world! '); ``` Result: -``` text +```result ┌─trimRight(' Hello, world! ')─┐ │ Hello, world! │ └──────────────────────────────────────┘ @@ -734,7 +816,7 @@ Result: ## trimBoth -Removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. It does not remove other kinds of whitespace characters (tab, no-break space, etc.). +Removes the consecutive occurrences of whitespace (ASCII-character 32) from both ends of a string. **Syntax** @@ -746,7 +828,7 @@ Alias: `trim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -756,33 +838,31 @@ Type: `String`. **Example** -Query: - ``` sql SELECT trimBoth(' Hello, world! '); ``` Result: -``` text +```result ┌─trimBoth(' Hello, world! ')─┐ │ Hello, world! │ └─────────────────────────────────────┘ ``` -## CRC32(s) +## CRC32 -Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). +Returns the CRC32 checksum of a string using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). The result type is UInt32. -## CRC32IEEE(s) +## CRC32IEEE Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial. The result type is UInt32. -## CRC64(s) +## CRC64 Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial. @@ -800,25 +880,23 @@ normalizeQuery(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). **Returned value** -- Sequence of characters with placeholders. +- Sequence of characters with placeholders. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT normalizeQuery('[1, 2, 3, x]') AS query; ``` Result: -``` text +```result ┌─query────┐ │ [?.., x] │ └──────────┘ @@ -826,7 +904,7 @@ Result: ## normalizedQueryHash -Returns identical 64bit hash values without the values of literals for similar queries. It helps to analyze query log. +Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log. **Syntax** @@ -836,25 +914,23 @@ normalizedQueryHash(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). **Example** -Query: - ``` sql SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; ``` Result: -``` text +```result ┌─res─┐ │ 1 │ └─────┘ @@ -862,7 +938,7 @@ Result: ## normalizeUTF8NFC -Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -872,25 +948,23 @@ normalizeUTF8NFC(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFC normalization form. +- String transformed to NFC normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFC('â') AS nfc, length(nfc) AS nfc_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfc─┬─nfc_len─┐ │ 2 │ â │ 2 │ └─────────────┴─────┴─────────┘ @@ -898,7 +972,7 @@ Result: ## normalizeUTF8NFD -Converts a string to [NFD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -908,25 +982,23 @@ normalizeUTF8NFD(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFD normalization form. +- String transformed to NFD normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFD('â') AS nfd, length(nfd) AS nfd_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfd─┬─nfd_len─┐ │ 2 │ â │ 3 │ └─────────────┴─────┴─────────┘ @@ -934,7 +1006,7 @@ Result: ## normalizeUTF8NFKC -Converts a string to [NFKC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFKC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -944,25 +1016,23 @@ normalizeUTF8NFKC(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFKC normalization form. +- String transformed to NFKC normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFKC('â') AS nfkc, length(nfkc) AS nfkc_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfkc─┬─nfkc_len─┐ │ 2 │ â │ 2 │ └─────────────┴──────┴──────────┘ @@ -970,7 +1040,7 @@ Result: ## normalizeUTF8NFKD -Converts a string to [NFKD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFKD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -980,25 +1050,23 @@ normalizeUTF8NFKD(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFKD normalization form. +- String transformed to NFKD normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFKD('â') AS nfkd, length(nfkd) AS nfkd_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfkd─┬─nfkd_len─┐ │ 2 │ â │ 3 │ └─────────────┴──────┴──────────┘ @@ -1006,9 +1074,10 @@ Result: ## encodeXMLComponent -Escapes characters to place string into XML text node or attribute. +Escapes characters with special meaning in XML such that they can afterwards be place into a XML text node or attribute. -The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`, `'`. +The following characters are replaced: `<`, `&`, `>`, `"`, `'`. +Also see the [list of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references). **Syntax** @@ -1018,18 +1087,16 @@ encodeXMLComponent(x) **Arguments** -- `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- The sequence of characters with escape characters. +- The escaped string. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT encodeXMLComponent('Hello, "world"!'); SELECT encodeXMLComponent('<123>'); @@ -1039,7 +1106,7 @@ SELECT encodeXMLComponent('\'foo\''); Result: -``` text +```result Hello, "world"! <123> &clickhouse @@ -1048,7 +1115,8 @@ Hello, "world"! ## decodeXMLComponent -Replaces XML predefined entities with characters. Predefined entities are `"` `&` `'` `>` `<` +Un-escapes substrings with special meaning in XML. These substrings are: `"` `&` `'` `>` `<` + This function also replaces numeric character references with Unicode characters. Both decimal (like `✓`) and hexadecimal (`✓`) forms are supported. **Syntax** @@ -1059,18 +1127,16 @@ decodeXMLComponent(x) **Arguments** -- `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- The sequence of characters after replacement. +- The un-escaped string. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT decodeXMLComponent(''foo''); SELECT decodeXMLComponent('< Σ >'); @@ -1078,25 +1144,20 @@ SELECT decodeXMLComponent('< Σ >'); Result: -``` text +```result 'foo' < Σ > ``` -**See Also** - -- [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references) - - - ## extractTextFromHTML -A function to extract text from HTML or XHTML. -It does not necessarily 100% conform to any of the HTML, XML or XHTML standards, but the implementation is reasonably accurate and it is fast. The rules are the following: +This function extracts plain text from HTML or XHTML. -1. Comments are skipped. Example: ``. Comment must end with `-->`. Nested comments are not possible. +It does not conform 100% to the HTML, XML or XHTML specification but the implementation is reasonably accurate and fast. The rules are the following: + +1. Comments are skipped. Example: ``. Comment must end with `-->`. Nested comments are disallowed. Note: constructions like `` and `` are not valid comments in HTML but they are skipped by other rules. -2. CDATA is pasted verbatim. Note: CDATA is XML/XHTML specific. But it is processed for "best-effort" approach. +2. CDATA is pasted verbatim. Note: CDATA is XML/XHTML-specific and processed on a "best-effort" basis. 3. `script` and `style` elements are removed with all their content. Note: it is assumed that closing tag cannot appear inside content. For example, in JS string literal has to be escaped like `"<\/script>"`. Note: comments and CDATA are possible inside `script` or `style` - then closing tags are not searched inside CDATA. Example: `]]>`. But they are still searched inside comments. Sometimes it becomes complicated: ` var y = "-->"; alert(x + y);` Note: `script` and `style` can be the names of XML namespaces - then they are not treated like usual `script` or `style` elements. Example: `Hello`. @@ -1121,11 +1182,11 @@ extractTextFromHTML(x) **Arguments** -- `x` — input text. [String](../../sql-reference/data-types/string.md). +- `x` — input text. [String](../../sql-reference/data-types/string.md). **Returned value** -- Extracted text. +- Extracted text. Type: [String](../../sql-reference/data-types/string.md). @@ -1135,8 +1196,6 @@ The first example contains several tags and a comment and also shows whitespace The second example shows `CDATA` and `script` tag processing. In the third example text is extracted from the full HTML response received by the [url](../../sql-reference/table-functions/url.md) function. -Query: - ``` sql SELECT extractTextFromHTML('

A text withtags.

'); SELECT extractTextFromHTML('CDATA
]]> '); @@ -1145,54 +1204,52 @@ SELECT extractTextFromHTML(html) FROM url('http://www.donothingfor2minutes.com/' Result: -``` text +```result A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` -## ascii(s) {#ascii} +## ascii {#ascii} -Returns the ASCII code point of the first character of str. The result type is Int32. +Returns the ASCII code point (as Int32) of the first character of string `s`. -If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined. +If `s` is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 supplement range of UTF-16, the result is undefined. +**Syntax** +```sql +ascii(s) +``` -## concatWithSeparator +## soundex -Returns the concatenation strings separated by string separator. If any of the argument values is `NULL`, the function returns `NULL`. +Returns the [Soundex code](https://en.wikipedia.org/wiki/Soundex) of a string. **Syntax** ``` sql -concatWithSeparator(sep, expr1, expr2, expr3...) +soundex(val) ``` **Arguments** -- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -**Returned values** -- The concatenated String. +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- The Soundex code of the input value. [String](../data-types/string.md) **Example** -Query: - ``` sql -SELECT concatWithSeparator('a', '1', '2', '3', '4') +select soundex('aksel'); ``` Result: -``` text -┌─concatWithSeparator('a', '1', '2', '3', '4')─┐ -│ 1a2a3a4 │ -└───────────────────────────────────┘ +```result +┌─soundex('aksel')─┐ +│ A240 │ +└──────────────────┘ ``` - -## concatWithSeparatorAssumeInjective -Same as concatWithSeparator, the difference is that you need to ensure that concatWithSeparator(sep, expr1, expr2, expr3...) → result is injective, it will be used for optimization of GROUP BY. - -The function is named “injective” if it always returns different result for different values of arguments. In other words: different arguments never yield identical result. diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 50e15f70f5d..56c527d734e 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -1,34 +1,54 @@ --- slug: /en/sql-reference/functions/string-replace-functions -sidebar_position: 42 +sidebar_position: 150 sidebar_label: Replacing in Strings --- -# Functions for Searching and Replacing in Strings +# Functions for Replacing in Strings -:::note -Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. -::: +[General strings functions](string-functions.md) and [functions for searchin in strings](string-search-functions.md) are described separately. -## replaceOne(haystack, pattern, replacement) +## replaceOne -Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string. -‘pattern’ and ‘replacement’ must be constants. +Replaces the first occurrence of the substring `pattern` in `haystack` by the `replacement` string. -## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) +**Syntax** -Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string. +```sql +replaceOne(haystack, pattern, replacement) +``` -## replaceRegexpOne(haystack, pattern, replacement) +## replaceAll -Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string. -‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax). -‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`. +Replaces all occurrences of the substring `pattern` in `haystack` by the `replacement` string. + +**Syntax** + +```sql +replaceAll(haystack, pattern, replacement) +``` + +Alias: `replace`. + +## replaceRegexpOne + +Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string. + +`replacement` can containing substitutions `\0-\9`. Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. -To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`. -Also keep in mind that string literals require an extra escaping. -Example 1. Converting ISO dates to American format: +To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`. +Also keep in mind that string literals require extra escaping. + +**Syntax** + +```sql +replaceRegexpOne(haystack, pattern, replacement) +``` + +**Example** + +Converting ISO dates to American format: ``` sql SELECT DISTINCT @@ -39,6 +59,8 @@ LIMIT 7 FORMAT TabSeparated ``` +Result: + ``` text 2014-03-17 03/17/2014 2014-03-18 03/18/2014 @@ -49,81 +71,91 @@ FORMAT TabSeparated 2014-03-23 03/23/2014 ``` -Example 2. Copying a string ten times: +Copying a string ten times: ``` sql SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0') AS res ``` +Result: + ``` text ┌─res────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World! │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## replaceRegexpAll(haystack, pattern, replacement) +## replaceRegexpAll -Like ‘replaceRegexpOne‘, but replaces all occurrences of the pattern. Example: +Like `replaceRegexpOne` but replaces all occurrences of the pattern. + +Alias: `REGEXP_REPLACE`. + +**Example** ``` sql SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS res ``` +Result: + ``` text ┌─res────────────────────────┐ │ HHeelllloo,, WWoorrlldd!! │ └────────────────────────────┘ ``` -As an exception, if a regular expression worked on an empty substring, the replacement is not made more than once. -Example: +As an exception, if a regular expression worked on an empty substring, the replacement is not made more than once, e.g.: ``` sql SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res ``` +Result: + ``` text ┌─res─────────────────┐ │ here: Hello, World! │ └─────────────────────┘ ``` -## regexpQuoteMeta(s) +## regexpQuoteMeta + +Adds a backslash before these characters with special meaning in regular expressions: `\0`, `\\`, `|`, `(`, `)`, `^`, `$`, `.`, `[`, `]`, `?`, `*`, `+`, `{`, `:`, `-`. -The function adds a backslash before some predefined characters in the string. -Predefined characters: `\0`, `\\`, `|`, `(`, `)`, `^`, `$`, `.`, `[`, `]`, `?`, `*`, `+`, `{`, `:`, `-`. This implementation slightly differs from re2::RE2::QuoteMeta. It escapes zero byte as `\0` instead of `\x00` and it escapes only required characters. -For more information, see the link: [RE2](https://github.com/google/re2/blob/master/re2/re2.cc#L473) +For more information, see [RE2](https://github.com/google/re2/blob/master/re2/re2.cc#L473) +**Syntax** -## translate(s, from, to) +```sql +regexpQuoteMeta(s) +``` -The function replaces characters in the string ‘s’ in accordance with one-to-one character mapping defined by ‘from’ and ‘to’ strings. ‘from’ and ‘to’ must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified. +## translate -Example: +Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified. + +**Syntax** + +```sql +translate(s, from, to) +``` + +**Example** ``` sql SELECT translate('Hello, World!', 'delor', 'DELOR') AS res ``` +Result: + ``` text ┌─res───────────┐ │ HELLO, WORLD! │ └───────────────┘ ``` -## translateUTF8(string, from, to) +## translateUTF8 -Similar to previous function, but works with UTF-8 arguments. ‘from’ and ‘to’ must be valid constant UTF-8 strings of the same size. - -Example: - -``` sql -SELECT translateUTF8('Hélló, Wórld¡', 'óé¡', 'oe!') AS res -``` - -``` text -┌─res───────────┐ -│ Hello, World! │ -└───────────────┘ -``` +Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings. diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 2f660d820d1..3d8f89f7295 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,24 +1,26 @@ --- slug: /en/sql-reference/functions/string-search-functions -sidebar_position: 41 +sidebar_position: 160 sidebar_label: Searching in Strings --- # Functions for Searching in Strings -The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. +All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants. +Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is +`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected. -:::note -Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. -::: +Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is +violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function +variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the +results are undefined. Note that no automatic Unicode normalization is performed, you can use the +[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -## position(haystack, needle), locate(haystack, needle) +[General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. -Searches for the substring `needle` in the string `haystack`. +## position -Returns the position (in bytes) of the found substring in the string, starting from 1. - -For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive). +Returns the position (in bytes, starting at 1) of a substring `needle` in a string `haystack`. **Syntax** @@ -26,35 +28,33 @@ For a case-insensitive search, use the function [positionCaseInsensitive](#posit position(haystack, needle[, start_pos]) ``` -``` sql -position(needle IN haystack) -``` - -Alias: `locate(haystack, needle[, start_pos])`. - -:::note -Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`. -::: +Alias: +- `position(needle IN haystack)` +- `locate(haystack, needle[, start_pos])`. **Arguments** -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. **Returned values** -- Starting position in bytes (counting from 1), if substring was found. -- 0, if the substring was not found. +- Starting position in bytes and counting from 1, if the substring was found. +- 0, if the substring was not found. + +If substring `needle` is empty, these rules apply: +- if no `start_pos` was specified: return `1` +- if `start_pos = 0`: return `1` +- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` +- otherwise: return `0` + +The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` Type: `Integer`. **Examples** -The phrase “Hello, world!” contains a set of bytes representing a single-byte encoded text. The function returns some expected result: - -Query: - ``` sql SELECT position('Hello, world!', '!'); ``` @@ -67,6 +67,8 @@ Result: └────────────────────────────────┘ ``` +Example with `start_pos` argument: + ``` sql SELECT position('Hello, world!', 'o', 1), @@ -79,29 +81,21 @@ SELECT └───────────────────────────────────┴───────────────────────────────────┘ ``` -The same phrase in Russian contains characters which can’t be represented using a single byte. The function returns some unexpected result (use [positionUTF8](#positionutf8) function for multi-byte encoded text): +Example for `needle IN haystack` syntax: -Query: - -``` sql -SELECT position('Привет, мир!', '!'); +```sql +SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); ``` Result: -``` text -┌─position('Привет, мир!', '!')─┐ -│ 21 │ -└───────────────────────────────┘ +```text +┌─equals(6, position(s, '/'))─┐ +│ 1 │ +└─────────────────────────────┘ ``` -If argument `needle` is empty the following rules apply: -- if no `start_pos` was specified: return `1` -- if `start_pos = 0`: return `1` -- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` -- otherwise: return `0` - -The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` +Examples with empty `needle` substring: ``` sql SELECT @@ -120,223 +114,59 @@ SELECT └─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘ ``` - -**Examples for POSITION(needle IN haystack) syntax** - -Query: - -```sql -SELECT 3 = position('c' IN 'abc'); -``` - -Result: - -```text -┌─equals(3, position('abc', 'c'))─┐ -│ 1 │ -└─────────────────────────────────┘ -``` - -Query: - -```sql -SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); -``` - -Result: - -```text -┌─equals(6, position(s, '/'))─┐ -│ 1 │ -└─────────────────────────────┘ -``` - ## positionCaseInsensitive -The same as [position](#position) returns the position (in bytes) of the found substring in the string, starting from 1. Use the function for a case-insensitive search. - -Works under the assumption that the string contains a set of bytes representing a single-byte encoded text. If this assumption is not met and a character can’t be represented using a single byte, the function does not throw an exception and returns some unexpected result. If character can be represented using two bytes, it will use two bytes and so on. - -**Syntax** - -``` sql -positionCaseInsensitive(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md). - -**Returned values** - -- Starting position in bytes (counting from 1), if substring was found. -- 0, if the substring was not found. - -Type: `Integer`. - -**Example** - -Query: - -``` sql -SELECT positionCaseInsensitive('Hello, world!', 'hello'); -``` - -Result: - -``` text -┌─positionCaseInsensitive('Hello, world!', 'hello')─┐ -│ 1 │ -└───────────────────────────────────────────────────┘ -``` +Like [position](#position) but searches case-insensitively. ## positionUTF8 -Returns the position (in Unicode points) of the found substring in the string, starting from 1. - -Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, the function does not throw an exception and returns some unexpected result. If character can be represented using two Unicode points, it will use two and so on. - -For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#positioncaseinsensitiveutf8). - -**Syntax** - -``` sql -positionUTF8(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md) - -**Returned values** - -- Starting position in Unicode points (counting from 1), if substring was found. -- 0, if the substring was not found. - -Type: `Integer`. +Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded strings. **Examples** -The phrase “Hello, world!” in Russian contains a set of Unicode points representing a single-point encoded text. The function returns some expected result: - -Query: +Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint: ``` sql -SELECT positionUTF8('Привет, мир!', '!'); +SELECT positionUTF8('Motörhead', 'r'); ``` Result: ``` text -┌─positionUTF8('Привет, мир!', '!')─┐ -│ 12 │ -└───────────────────────────────────┘ -``` - -The phrase “Salut, étudiante!”, where character `é` can be represented using a one point (`U+00E9`) or two points (`U+0065U+0301`) the function can be returned some unexpected result: - -Query for the letter `é`, which is represented one Unicode point `U+00E9`: - -``` sql -SELECT positionUTF8('Salut, étudiante!', '!'); -``` - -Result: - -``` text -┌─positionUTF8('Salut, étudiante!', '!')─┐ -│ 17 │ -└────────────────────────────────────────┘ -``` - -Query for the letter `é`, which is represented two Unicode points `U+0065U+0301`: - -``` sql -SELECT positionUTF8('Salut, étudiante!', '!'); -``` - -Result: - -``` text -┌─positionUTF8('Salut, étudiante!', '!')─┐ -│ 18 │ -└────────────────────────────────────────┘ +┌─position('Motörhead', 'r')─┐ +│ 5 │ +└────────────────────────────┘ ``` ## positionCaseInsensitiveUTF8 -The same as [positionUTF8](#positionutf8), but is case-insensitive. Returns the position (in Unicode points) of the found substring in the string, starting from 1. - -Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, the function does not throw an exception and returns some unexpected result. If character can be represented using two Unicode points, it will use two and so on. - -**Syntax** - -``` sql -positionCaseInsensitiveUTF8(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md) - -**Returned value** - -- Starting position in Unicode points (counting from 1), if substring was found. -- 0, if the substring was not found. - -Type: `Integer`. - -**Example** - -Query: - -``` sql -SELECT positionCaseInsensitiveUTF8('Привет, мир!', 'Мир'); -``` - -Result: - -``` text -┌─positionCaseInsensitiveUTF8('Привет, мир!', 'Мир')─┐ -│ 9 │ -└────────────────────────────────────────────────────┘ -``` +Like [positionUTF8](#positionutf8) but searches case-insensitively. ## multiSearchAllPositions -The same as [position](../../sql-reference/functions/string-search-functions.md#position) but returns `Array` of positions (in bytes) of the found corresponding substrings in the string. Positions are indexed starting from 1. +Like [position](#position) but returns an array of positions (in bytes, starting at 1) for multiple `needle` substrings in a `haystack` string. -The search is performed on sequences of bytes without respect to string encoding and collation. - -- For case-insensitive ASCII search, use the function `multiSearchAllPositionsCaseInsensitive`. -- For search in UTF-8, use the function [multiSearchAllPositionsUTF8](#multiSearchAllPositionsUTF8). -- For case-insensitive UTF-8 search, use the function multiSearchAllPositionsCaseInsensitiveUTF8. +:::note +All `multiSearch*()` functions only support up to 28 needles. +::: **Syntax** ``` sql -multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen]) +multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) ``` **Arguments** -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. Array **Returned values** -- Array of starting positions in bytes (counting from 1), if the corresponding substring was found and 0 if not found. +- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found) **Example** -Query: - ``` sql SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); ``` @@ -351,103 +181,172 @@ Result: ## multiSearchAllPositionsUTF8 -See `multiSearchAllPositions`. +Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. -## multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) +## multiSearchFirstPosition -The same as `position` but returns the leftmost offset of the string `haystack` that is matched to some of the needles. +Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. -For a case-insensitive search or/and in UTF-8 format use functions `multiSearchFirstPositionCaseInsensitive, multiSearchFirstPositionUTF8, multiSearchFirstPositionCaseInsensitiveUTF8`. +Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. -## multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +**Syntax** + +```sql +multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) +``` + +## multiSearchFirstIndex Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. -For a case-insensitive search or/and in UTF-8 format use functions `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. +Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. -## multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) +**Syntax** + +```sql +multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +``` + +## multiSearchAny Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. -For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. +Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. -:::note -In all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. -::: +**Syntax** -## match(haystack, pattern) +```sql +multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) +``` -Checks whether string `haystack` matches the regular expression `pattern`. The pattern is an [re2 regular expression](https://github.com/google/re2/wiki/Syntax) which has a more limited syntax than Perl regular expressions. +## match -Returns 1 in case of a match, and 0 otherwise. +Returns whether string `haystack` matches the regular expression `pattern` in [re2 regular syntax](https://github.com/google/re2/wiki/Syntax). -Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes. -If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. -No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular +expression must not contain null bytes. If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`. -For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster. +If you only want to search substrings in a string, you can use functions [like](#like) or [position](#position) instead - they work much faster than this function. -## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +**Syntax** -The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. +```sql +match(haystack, pattern) +``` + +Alias: `haystack REGEXP pattern operator` + +## multiMatchAny + +Like `match` but returns 1 if at least one of the patterns match and 0 otherwise. :::note -Functions `multiMatchAny`, `multiMatchAnyIndex`, `multiMatchAllIndices` and their fuzzy equivalents (`multiFuzzyMatchAny`, -`multiFuzzyMatchAnyIndex`, `multiFuzzyMatchAllIndices`) use the (Vectorscan)[https://github.com/VectorCamp/vectorscan] library. As such, -they are only enabled if ClickHouse is compiled with support for vectorscan. +Functions in the `multi[Fuzzy]Match*()` family use the the (Vectorscan)[https://github.com/VectorCamp/vectorscan] library. As such, they are only enabled if ClickHouse is compiled with support for vectorscan. + +To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. Due to restrictions of vectorscan, the length of the `haystack` string must be less than 232 bytes. Hyperscan is generally vulnerable to regular expression denial of service (ReDoS) attacks (e.g. see (here)[https://www.usenix.org/conference/usenixsecurity22/presentation/turonova], (here)[https://doi.org/10.1007/s10664-021-10033-1] and -(here)[ https://doi.org/10.1145/3236024.3236027]. Users are adviced to check the provided patterns carefully. +(here)[https://doi.org/10.1145/3236024.3236027]. Users are adviced to check the provided patterns carefully. ::: -## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +If you only want to search multiple substrings in a string, you can use function [multiSearchAny](#multisearchany) instead - it works much faster than this function. -The same as `multiMatchAny`, but returns any index that matches the haystack. +**Syntax** -## multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +```sql +multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +``` -The same as `multiMatchAny`, but returns the array of all indices that match the haystack in any order. +## multiMatchAnyIndex -## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +Like `multiMatchAny` but returns any index that matches the haystack. -The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. +**Syntax** -## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +```sql +multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +``` -The same as `multiFuzzyMatchAny`, but returns any index that matches the haystack within a constant edit distance. +## multiMatchAllIndices -## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +Like `multiMatchAny` but returns the array of all indices that match the haystack in any order. -The same as `multiFuzzyMatchAny`, but returns the array of all indices in any order that match the haystack within a constant edit distance. +**Syntax** + +```sql +multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAny + +Like `multiMatchAny` but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. :::note -`multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. +`multiFuzzyMatch*()` function family do not support UTF-8 regular expressions (it threats them as a sequence of bytes) due to restrictions of hyperscan. ::: -:::note -To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. -::: +**Syntax** -## extract(haystack, pattern) +```sql +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` -Extracts a fragment of a string using a regular expression. If ‘haystack’ does not match the ‘pattern’ regex, an empty string is returned. If the regex does not contain subpatterns, it takes the fragment that matches the entire regex. Otherwise, it takes the fragment that matches the first subpattern. +## multiFuzzyMatchAnyIndex -## extractAll(haystack, pattern) +Like `multiFuzzyMatchAny` but returns any index that matches the haystack within a constant edit distance. -Extracts all the fragments of a string using a regular expression. If ‘haystack’ does not match the ‘pattern’ regex, an empty string is returned. Returns an array of strings consisting of all matches to the regex. In general, the behavior is the same as the ‘extract’ function (it takes the first subpattern, or the entire expression if there isn’t a subpattern). +**Syntax** + +```sql +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAllIndices + +Like `multiFuzzyMatchAny` but returns the array of all indices in any order that match the haystack within a constant edit distance. + +**Syntax** + +```sql +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## extract + +Extracts a fragment of a string using a regular expression. If `haystack` does not match the `pattern` regex, an empty string is returned. + +For regex without subpatterns, the function uses the fragment that matches the entire regex. Otherwise, it uses the fragment that matches the first subpattern. + +**Syntax** + +```sql +extract(haystack, pattern) +``` + +## extractAll + +Extracts all fragments of a string using a regular expression. If `haystack` does not match the `pattern` regex, an empty string is returned. + +Returns an array of strings consisting of all matches of the regex. + +The behavior with respect to subpatterns is the same as in function `extract`. + +**Syntax** + +```sql +extractAll(haystack, pattern) +``` ## extractAllGroupsHorizontal Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc. -:::note -`extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). -::: +This function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). **Syntax** @@ -457,19 +356,17 @@ extractAllGroupsHorizontal(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Type: [Array](../../sql-reference/data-types/array.md). If `haystack` does not match the `pattern` regex, an array of empty arrays is returned. **Example** -Query: - ``` sql SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ``` @@ -482,10 +379,6 @@ Result: └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** - -- [extractAllGroupsVertical](#extractallgroups-vertical) - ## extractAllGroupsVertical Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where each array includes matching fragments from every group. Fragments are grouped in order of appearance in the `haystack`. @@ -498,19 +391,17 @@ extractAllGroupsVertical(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Type: [Array](../../sql-reference/data-types/array.md). If `haystack` does not match the `pattern` regex, an empty array is returned. **Example** -Query: - ``` sql SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ``` @@ -523,119 +414,88 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** +## like -- [extractAllGroupsHorizontal](#extractallgroups-horizontal) +Returns whether string `haystack` matches the LIKE expression `pattern`. -## like(haystack, pattern), haystack LIKE pattern operator +A LIKE expression can contain normal characters and the following metasymbols: -Checks whether a string matches a LIKE expression. -A LIKE expression contains a mix of normal characters and the following metasymbols: - -- `%` indicates an arbitrary number of arbitrary characters (including zero characters). - -- `_` indicates a single arbitrary character. - -- `\` is for escaping literals `%`, `_` and `\`. +- `%` indicates an arbitrary number of arbitrary characters (including zero characters). +- `_` indicates a single arbitrary character. +- `\` is for escaping literals `%`, `_` and `\`. Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. -If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. + +If the haystack or the LIKE expression are not valid UTF-8, the behavior is undefined. + No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`. -The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`. +To match against literal `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. +The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`. -For patterns of the form `%needle%`, the function is as fast as the `position` function. -Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`. - -## notLike(haystack, pattern), haystack NOT LIKE pattern operator - -The same thing as ‘like’, but negative. - -## ilike - -Case insensitive variant of [like](https://clickhouse.com/docs/en/sql-reference/functions/string-search-functions/#function-like) function. You can use `ILIKE` operator instead of the `ilike` function. - -The function ignores the language, e.g. for Turkish (i/İ), the result might be incorrect. +For LIKE expressions of the form `%needle%`, the function is as fast as the `position` function. +All other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`. **Syntax** -``` sql -ilike(haystack, pattern) +```sql +like(haystack, pattern) ``` -**Arguments** +Alias: `haystack LIKE pattern` (operator) -- `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — If `pattern` does not contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters. +## notLike -Some `pattern` examples: +Like `like` but negates the result. -``` text -'abc' ILIKE 'abc' true -'abc' ILIKE 'a%' true -'abc' ILIKE '_b_' true -'abc' ILIKE 'c' false +Alias: `haystack NOT LIKE pattern` (operator) + +## ilike + +Like `like` but searches case-insensitively. + +Alias: `haystack ILIKE pattern` (operator) + +## notILike + +Like `ilike` but negates the result. + +Alias: `haystack NOT ILIKE pattern` (operator) + +## ngramDistance + +Calculates the 4-gram distance between a `haystack` string and a `needle` string. For that, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a Float32 between 0 and 1. The smaller the result is, the more strings are similar to each other. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1. + +Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. + +**Syntax** + +```sql +ngramDistance(haystack, needle) ``` -**Returned values** +## ngramSearch -- True, if the string matches `pattern`. -- False, if the string does not match `pattern`. +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`. -**Example** - -Input table: - -``` text -┌─id─┬─name─────┬─days─┐ -│ 1 │ January │ 31 │ -│ 2 │ February │ 29 │ -│ 3 │ March │ 31 │ -│ 4 │ April │ 30 │ -└────┴──────────┴──────┘ -``` - -Query: - -``` sql -SELECT * FROM Months WHERE ilike(name, '%j%'); -``` - -Result: - -``` text -┌─id─┬─name────┬─days─┐ -│ 1 │ January │ 31 │ -└────┴─────────┴──────┘ -``` - -**See Also** - - - -## ngramDistance(haystack, needle) - -Calculates the 4-gram distance between `haystack` and `needle`: counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns float number from 0 to 1 – the closer to zero, the more strings are similar to each other. If the constant `needle` or `haystack` is more than 32Kb, throws an exception. If some of the non-constant `haystack` or `needle` strings are more than 32Kb, the distance is always one. - -For case-insensitive search or/and in UTF-8 format use functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8`. - -## ngramSearch(haystack, needle) - -Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` – the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. The closer to one, the more likely `needle` is in the `haystack`. Can be useful for fuzzy string search. - -For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. +Functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. :::note -For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. +The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. ::: +**Syntax** + +```sql +ngramSearch(haystack, needle) +``` + ## countSubstrings -Returns the number of substring occurrences. +Returns how often substring `needle` occurs in string `haystack`. -For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitive) or [countSubstringsCaseInsensitiveUTF8](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitiveUTF8) functions. +Functions `countSubstringsCaseInsensitive` and `countSubstringsCaseInsensitiveUTF8` provide a case-insensitive and case-insensitive + UTF-8 variants of this function. **Syntax** @@ -645,34 +505,18 @@ countSubstrings(haystack, needle[, start_pos]) **Arguments** -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md). +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. **Returned values** -- Number of occurrences. +- The number of occurrences. Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** -Query: - -``` sql -SELECT countSubstrings('foobar.com', '.'); -``` - -Result: - -``` text -┌─countSubstrings('foobar.com', '.')─┐ -│ 1 │ -└────────────────────────────────────┘ -``` - -Query: - ``` sql SELECT countSubstrings('aaaa', 'aa'); ``` @@ -685,7 +529,7 @@ Result: └───────────────────────────────┘ ``` -Query: +Example with `start_pos` argument: ```sql SELECT countSubstrings('abc___abc', 'abc', 4); @@ -699,125 +543,7 @@ Result: └────────────────────────────────────────┘ ``` -## countSubstringsCaseInsensitive - -Returns the number of substring occurrences case-insensitive. - -**Syntax** - -``` sql -countSubstringsCaseInsensitive(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md). - -**Returned values** - -- Number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). - -**Examples** - -Query: - -``` sql -SELECT countSubstringsCaseInsensitive('aba', 'B'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitive('aba', 'B')─┐ -│ 1 │ -└────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT countSubstringsCaseInsensitive('foobar.com', 'CoM'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitive('foobar.com', 'CoM')─┐ -│ 1 │ -└─────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT countSubstringsCaseInsensitive('abC___abC', 'aBc', 2); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitive('abC___abC', 'aBc', 2)─┐ -│ 1 │ -└───────────────────────────────────────────────────────┘ -``` - -## countSubstringsCaseInsensitiveUTF8 - -Returns the number of substring occurrences in `UTF-8` case-insensitive. - -**Syntax** - -``` sql -SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md). - -**Returned values** - -- Number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). - -**Examples** - -Query: - -``` sql -SELECT countSubstringsCaseInsensitiveUTF8('абв', 'A'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitiveUTF8('абв', 'A')─┐ -│ 1 │ -└────────────────────────────────────────────────┘ -``` - -Query: - -```sql -SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв')─┐ -│ 3 │ -└────────────────────────────────────────────────────────────┘ -``` - -## countMatches(haystack, pattern) +## countMatches Returns the number of regular expression matches for a `pattern` in a `haystack`. @@ -829,19 +555,17 @@ countMatches(haystack, pattern) **Arguments** -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). **Returned value** -- The number of matches. +- The number of matches. Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** -Query: - ``` sql SELECT countMatches('foobar.com', 'o+'); ``` @@ -854,8 +578,6 @@ Result: └──────────────────────────────────┘ ``` -Query: - ``` sql SELECT countMatches('aaaa', 'aa'); ``` @@ -868,7 +590,7 @@ Result: └───────────────────────────────┘ ``` -## regexpExtract(haystack, pattern[, index]) +## regexpExtract Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index. @@ -882,9 +604,9 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. **Arguments** -- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. +- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. **Returned values** @@ -899,8 +621,12 @@ SELECT regexpExtract('100-200', '(\\d+)-(\\d+)', 1), regexpExtract('100-200', '(\\d+)-(\\d+)', 2), regexpExtract('100-200', '(\\d+)-(\\d+)', 0), - regexpExtract('100-200', '(\\d+)-(\\d+)') + regexpExtract('100-200', '(\\d+)-(\\d+)'); +``` +Result: + +``` text ┌─regexpExtract('100-200', '(\\d+)-(\\d+)', 1)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 2)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 0)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)')─┐ │ 100 │ 200 │ 100-200 │ 100 │ └──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘ diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index c0eed01cccd..d8f23c92e61 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/time-window-functions -sidebar_position: 68 +sidebar_position: 175 sidebar_label: Time Window --- @@ -118,4 +118,4 @@ hopEnd(time_attr, hop_interval, window_interval [, timezone]); ## Related content -- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) \ No newline at end of file +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index a31ec3c41d2..1739920c9f0 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/tuple-functions -sidebar_position: 66 +sidebar_position: 180 sidebar_label: Tuples -title: "Functions for Working with Tuples" --- ## tuple @@ -22,15 +21,15 @@ tuple(x, y, …) ## tupleElement A function that allows getting a column from a tuple. -‘N’ is the column index, starting from 1. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. -There is no cost to execute the function. -The function implements the operator `x.N`. +If the second argument is a number `n`, it is the column index, starting from 1. If the second argument is a string `s`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or element for such name does not exist, the default value returned instead of throw exception. The second and third arguments if provided are always must be constant. There is no cost to execute the function. + +The function implements the operator `x.n` and `x.s`. **Syntax** ``` sql -tupleElement(tuple, n) +tupleElement(tuple, n/s [, default_value]) ``` ## untuple @@ -47,11 +46,11 @@ You can use the `EXCEPT` expression to skip columns as a result of the query. **Arguments** -- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). +- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- None. +- None. **Examples** @@ -111,7 +110,7 @@ Result: **See Also** -- [Tuple](../../sql-reference/data-types/tuple.md) +- [Tuple](../../sql-reference/data-types/tuple.md) ## tupleHammingDistance @@ -125,16 +124,16 @@ tupleHammingDistance(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). Tuples should have the same type of the elements. **Returned value** -- The Hamming distance. +- The Hamming distance. -Type: The result type is calculed the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. +Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. ``` sql SELECT @@ -195,11 +194,11 @@ tupleToNameValuePairs(tuple) **Arguments** -- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. +- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. **Returned value** -- An array with (name, value) pairs. +- An array with (name, value) pairs. Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). @@ -208,7 +207,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-referen Query: ``` sql -CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory; +CREATE TABLE tupletest (col Tuple(user_ID UInt64, session_ID UInt64)) ENGINE = Memory; INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100)); @@ -224,14 +223,14 @@ Result: └───────────────────────────────────────┘ ``` -It is possible to transform colums to rows using this function: +It is possible to transform columns to rows using this function: ``` sql -CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory; +CREATE TABLE tupletest (col Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory; INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6)); -SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest; +SELECT arrayJoin(tupleToNameValuePairs(col)) FROM tupletest; ``` Result: @@ -272,12 +271,12 @@ Alias: `vectorSum`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the sum. +- Tuple with the sum. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -311,12 +310,12 @@ Alias: `vectorDifference`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the result of subtraction. +- Tuple with the result of subtraction. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -348,12 +347,12 @@ tupleMultiply(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the multiplication. +- Tuple with the multiplication. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -385,12 +384,12 @@ tupleDivide(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the result of division. +- Tuple with the result of division. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -422,11 +421,11 @@ tupleNegate(tuple) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the result of negation. +- Tuple with the result of negation. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -458,12 +457,12 @@ tupleMultiplyByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). **Returned value** -- Tuple with multiplied values. +- Tuple with multiplied values. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -495,12 +494,12 @@ tupleDivideByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). **Returned value** -- Tuple with divided values. +- Tuple with divided values. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -534,12 +533,12 @@ Alias: `scalarProduct`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Scalar product. +- Scalar product. Type: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 4a6780df292..8283de95994 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/tuple-map-functions -sidebar_position: 46 +sidebar_position: 120 sidebar_label: Maps -title: "Functions for Maps" --- ## map @@ -17,12 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). +- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). **Returned value** -- Data structure as `key:value` pairs. +- Data structure as `key:value` pairs. Type: [Map(key, value)](../../sql-reference/data-types/map.md). @@ -64,7 +63,153 @@ Result: **See Also** -- [Map(key, value)](../../sql-reference/data-types/map.md) data type +- [Map(key, value)](../../sql-reference/data-types/map.md) data type + +## mapFromArrays + +Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing. + + +The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. + + +**Syntax** + +```sql +mapFromArrays(keys, values) +``` + +Alias: `MAP_FROM_ARRAYS(keys, values)` + +**Arguments** + +- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) +- `values` - Given value array or map to create a map from. + +**Returned value** + +- A map whose keys and values are constructed from the key array and value array/map. + +**Example** + +Query: + +```sql +select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) + + +┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ +│ {'a':1,'b':2,'c':3} │ +└───────────────────────────────────────────┘ + +SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) + +┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐ +│ {1:('a',1),2:('b',2),3:('c',3)} │ +└───────────────────────────────────────────────────────┘ +``` + +## extractKeyValuePairs + +Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files). + +A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. + +**Syntax** + +``` sql +extractKeyValuePairs(data[, key_value_delimiter[, pair_delimiter[, quoting_character]]]) +``` + +Alias: +- `str_to_map` +- `mapFromString` + +**Arguments** + +- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). + +**Returned values** + +- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs. + +**Examples** + +Simple case: + +``` sql +SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv +``` + +Result: + +``` Result: +┌─kv──────────────────────────────────────────────────────────────────────┐ +│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +Single quote as quoting character: + +``` sql +SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv +``` + +Result: + +``` text +┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Escape sequences without escape sequences support: + +``` sql +SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv +``` + +Result: + +``` text +┌─kv─────────────────────┐ +│ {'age':'a\\x0A\\n\\0'} │ +└────────────────────────┘ +``` + +## extractKeyValuePairsWithEscaping + +Same as `extractKeyValuePairs` but with escaping support. + +Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`. +Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following: +`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31). + +This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following +input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`. +- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa` +- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is. + +Leading escape sequences will be skipped in keys and will be considered invalid for values. + +**Examples** + +Escape sequences with escape sequence support turned on: + +``` sql +SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv +``` + +Result: + +``` result +┌─kv────────────────┐ +│ {'age':'a\n\n\0'} │ +└───────────────────┘ +``` ## mapAdd @@ -82,7 +227,7 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -130,7 +275,7 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -183,17 +328,17 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../. Mapped arrays: -- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). +- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). or -- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). +- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. +- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. **Example** @@ -235,14 +380,14 @@ Determines whether the `map` contains the `key` parameter. mapContains(map, key) ``` -**Parameters** +**Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `key` — Key. Type matches the type of keys of `map` parameter. +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `key` — Key. Type matches the type of keys of `map` parameter. **Returned value** -- `1` if `map` contains `key`, `0` if not. +- `1` if `map` contains `key`, `0` if not. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -280,13 +425,13 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat mapKeys(map) ``` -**Parameters** +**Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Array containing all keys from the `map`. +- Array containing all keys from the `map`. Type: [Array](../../sql-reference/data-types/array.md). @@ -323,13 +468,13 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat mapValues(map) ``` -**Parameters** +**Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Array containing all the values from `map`. +- Array containing all the values from `map`. Type: [Array](../../sql-reference/data-types/array.md). @@ -352,25 +497,24 @@ Result: │ ['eleven','11'] │ │ ['twelve','6.0'] │ └──────────────────┘ -``` - -## mapContainsKeyLike - +``` + +## mapContainsKeyLike + **Syntax** ```sql mapContainsKeyLike(map, pattern) -``` - -**Parameters** - -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `pattern` - String pattern to match. - +``` + +**Arguments** +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `pattern` - String pattern to match. + **Returned value** -- `1` if `map` contains `key` like specified pattern, `0` if not. - +- `1` if `map` contains `key` like specified pattern, `0` if not. + **Example** Query: @@ -381,34 +525,34 @@ CREATE TABLE test (a Map(String,String)) ENGINE = Memory; INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); SELECT mapContainsKeyLike(a, 'a%') FROM test; -``` - -Result: - +``` + +Result: + ```text ┌─mapContainsKeyLike(a, 'a%')─┐ │ 1 │ │ 0 │ -└─────────────────────────────┘ -``` - -## mapExtractKeyLike - +└─────────────────────────────┘ +``` + +## mapExtractKeyLike + **Syntax** ```sql mapExtractKeyLike(map, pattern) -``` - -**Parameters** - -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `pattern` - String pattern to match. - +``` + +**Arguments** + +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `pattern` - String pattern to match. + **Returned value** -- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map. - +- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map. + **Example** Query: @@ -419,34 +563,34 @@ CREATE TABLE test (a Map(String,String)) ENGINE = Memory; INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); SELECT mapExtractKeyLike(a, 'a%') FROM test; -``` - -Result: - +``` + +Result: + ```text ┌─mapExtractKeyLike(a, 'a%')─┐ │ {'abc':'abc'} │ │ {} │ └────────────────────────────┘ -``` - -## mapApply - +``` + +## mapApply + **Syntax** ```sql mapApply(func, map) -``` - -**Parameters** - -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +``` + +**Arguments** + +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** - Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element. - + **Example** Query: @@ -458,36 +602,36 @@ FROM SELECT map('key1', number, 'key2', number * 2) AS _map FROM numbers(3) ) -``` - -Result: - +``` + +Result: + ```text ┌─r─────────────────────┐ │ {'key1':0,'key2':0} │ │ {'key1':10,'key2':20} │ │ {'key1':20,'key2':40} │ └───────────────────────┘ -``` +``` + +## mapFilter -## mapFilter - **Syntax** ```sql mapFilter(func, map) -``` - -**Parameters** +``` -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +**Arguments** + +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** - Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0. - - + + **Example** Query: @@ -499,48 +643,195 @@ FROM SELECT map('key1', number, 'key2', number * 2) AS _map FROM numbers(3) ) -``` - -Result: - +``` + +Result: + ```text ┌─r───────────────────┐ │ {'key1':0,'key2':0} │ │ {'key2':2} │ │ {'key1':2,'key2':4} │ └─────────────────────┘ -``` +``` -## mapUpdate - +## mapUpdate + **Syntax** ```sql mapUpdate(map1, map2) -``` - -**Parameters** +``` -- `map1` [Map](../../sql-reference/data-types/map.md). -- `map2` [Map](../../sql-reference/data-types/map.md). +**Arguments** + +- `map1` [Map](../../sql-reference/data-types/map.md). +- `map2` [Map](../../sql-reference/data-types/map.md). **Returned value** - Returns a map1 with values updated of values for the corresponding keys in map2. - + **Example** Query: ```sql SELECT mapUpdate(map('key1', 0, 'key3', 0), map('key1', 10, 'key2', 10)) AS map; -``` - -Result: - +``` + +Result: + ```text ┌─map────────────────────────────┐ │ {'key3':0,'key1':10,'key2':10} │ └────────────────────────────────┘ -``` +``` + +## mapConcat + +**Syntax** + +```sql +mapConcat(maps) +``` + +**Arguments** + +- `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type. + +**Returned value** + +- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]` + +**Examples** + +Query: + +```sql +SELECT mapConcat(map('key1', 1, 'key3', 3), map('key2', 2)) AS map; +``` + +Result: + +```text +┌─map──────────────────────────┐ +│ {'key1':1,'key3':3,'key2':2} │ +└──────────────────────────────┘ +``` + +Query: + +```sql +SELECT mapConcat(map('key1', 1, 'key2', 2), map('key1', 3)) AS map, map['key1']; +``` + +Result: + +```text +┌─map──────────────────────────┬─elem─┐ +│ {'key1':1,'key2':2,'key1':3} │ 1 │ +└──────────────────────────────┴──────┘ +``` + +## mapExists(\[func,\], map) + +Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. + +Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +**Example** + +Query: + +```sql +SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res +``` + +Result: + +```text +┌─res─┐ +│ 1 │ +└─────┘ +``` + +## mapAll(\[func,\] map) + +Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0. + +Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +**Example** + +Query: + +```sql +SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res +``` + +Result: + +```text +┌─res─┐ +│ 0 │ +└─────┘ +``` + +## mapSort(\[func,\], map) + +Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Examples** + +``` sql +SELECT mapSort(map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key1':3,'key2':2,'key3':1} │ +└──────────────────────────────┘ +``` + +``` sql +SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key3':1,'key2':2,'key1':3} │ +└──────────────────────────────┘ +``` + +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapReverseSort(\[func,\], map) + +Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. + + +**Examples** + +``` sql +SELECT mapReverseSort(map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key3':1,'key2':2,'key1':3} │ +└──────────────────────────────┘ +``` + +``` sql +SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key1':3,'key2':2,'key3':1} │ +└──────────────────────────────┘ +``` + +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function. diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 5d96113fe50..c7c66cc771f 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/type-conversion-functions -sidebar_position: 38 +sidebar_position: 185 sidebar_label: Type Conversion --- @@ -8,25 +8,61 @@ sidebar_label: Type Conversion ## Common Issues with Data Conversion +ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). + +`to` functions and [cast](#castx-t) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting. + +:::note Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected. +::: -ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). +Example: + +```sql +SELECT + toTypeName(toLowCardinality('') AS val) AS source_type, + toTypeName(toString(val)) AS to_type_result_type, + toTypeName(CAST(val, 'String')) AS cast_result_type + +┌─source_type────────────┬─to_type_result_type────┬─cast_result_type─┐ +│ LowCardinality(String) │ LowCardinality(String) │ String │ +└────────────────────────┴────────────────────────┴──────────────────┘ + +SELECT + toTypeName(toNullable('') AS val) AS source_type, + toTypeName(toString(val)) AS to_type_result_type, + toTypeName(CAST(val, 'String')) AS cast_result_type + +┌─source_type──────┬─to_type_result_type─┬─cast_result_type─┐ +│ Nullable(String) │ Nullable(String) │ String │ +└──────────────────┴─────────────────────┴──────────────────┘ + +SELECT + toTypeName(toNullable('') AS val) AS source_type, + toTypeName(toString(val)) AS to_type_result_type, + toTypeName(CAST(val, 'String')) AS cast_result_type +SETTINGS cast_keep_nullable = 1 + +┌─source_type──────┬─to_type_result_type─┬─cast_result_type─┐ +│ Nullable(String) │ Nullable(String) │ Nullable(String) │ +└──────────────────┴─────────────────────┴──────────────────┘ +``` ## toInt(8\|16\|32\|64\|128\|256) Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: -- `toInt8(expr)` — Converts to a value of data type `Int8`. -- `toInt16(expr)` — Converts to a value of data type `Int16`. -- `toInt32(expr)` — Converts to a value of data type `Int32`. -- `toInt64(expr)` — Converts to a value of data type `Int64`. -- `toInt128(expr)` — Converts to a value of data type `Int128`. -- `toInt256(expr)` — Converts to a value of data type `Int256`. +- `toInt8(expr)` — Converts to a value of data type `Int8`. +- `toInt16(expr)` — Converts to a value of data type `Int16`. +- `toInt32(expr)` — Converts to a value of data type `Int32`. +- `toInt64(expr)` — Converts to a value of data type `Int64`. +- `toInt128(expr)` — Converts to a value of data type `Int128`. +- `toInt256(expr)` — Converts to a value of data type `Int256`. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -117,19 +153,19 @@ Result: Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: -- `toUInt8(expr)` — Converts to a value of data type `UInt8`. -- `toUInt16(expr)` — Converts to a value of data type `UInt16`. -- `toUInt32(expr)` — Converts to a value of data type `UInt32`. -- `toUInt64(expr)` — Converts to a value of data type `UInt64`. -- `toUInt256(expr)` — Converts to a value of data type `UInt256`. +- `toUInt8(expr)` — Converts to a value of data type `UInt8`. +- `toUInt16(expr)` — Converts to a value of data type `UInt16`. +- `toUInt32(expr)` — Converts to a value of data type `UInt32`. +- `toUInt64(expr)` — Converts to a value of data type `UInt64`. +- `toUInt256(expr)` — Converts to a value of data type `UInt256`. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** -- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. +- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. @@ -304,11 +340,11 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). **Returned value** -- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). +- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). **Example** @@ -422,13 +458,13 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). -- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. -- `timezone` - Time zone of the specified datetime64 object. +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. +- `timezone` - Time zone of the specified datetime64 object. **Returned value** -- A calendar date and time of day, with sub-second precision. +- A calendar date and time of day, with sub-second precision. Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). @@ -488,33 +524,33 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. -- `toDecimal32(value, S)` -- `toDecimal64(value, S)` -- `toDecimal128(value, S)` -- `toDecimal256(value, S)` +- `toDecimal32(value, S)` +- `toDecimal64(value, S)` +- `toDecimal128(value, S)` +- `toDecimal256(value, S)` ## toDecimal(32\|64\|128\|256)OrNull Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: -- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. -- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. -- `toDecimal128OrNull(expr, S)` — Results in `Nullable(Decimal128(S))` data type. -- `toDecimal256OrNull(expr, S)` — Results in `Nullable(Decimal256(S))` data type. +- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. +- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. +- `toDecimal128OrNull(expr, S)` — Results in `Nullable(Decimal128(S))` data type. +- `toDecimal256OrNull(expr, S)` — Results in `Nullable(Decimal256(S))` data type. These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. **Returned value** A value in the `Nullable(Decimal(P,S))` data type. The value contains: -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- `NULL`, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- `NULL`, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. **Examples** @@ -551,24 +587,24 @@ Result: Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: -- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. +- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. +- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. +- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. +- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. These functions should be used instead of `toDecimal*()` functions, if you prefer to get a default value instead of an exception in the event of an input value parsing error. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. **Returned value** A value in the `Decimal(P,S)` data type. The value contains: -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. **Examples** @@ -604,24 +640,24 @@ Result: Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: -- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrZero( expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrZero( expr, S)` — Results in `Decimal256(S)` data type. +- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. +- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. +- `toDecimal128OrZero( expr, S)` — Results in `Decimal128(S)` data type. +- `toDecimal256OrZero( expr, S)` — Results in `Decimal256(S)` data type. These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. **Returned value** A value in the `Nullable(Decimal(P,S))` data type. The value contains: -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- 0 with `S` decimal places, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- 0 with `S` decimal places, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. **Example** @@ -737,6 +773,44 @@ Result: └────────────┴───────┘ ``` +## toDecimalString + +Converts a numeric value to String with the number of fractional digits in the output specified by the user. + +**Syntax** + +``` sql +toDecimalString(number, scale) +``` + +**Parameters** + +- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). + * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. + +**Returned value** + +- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). + The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale. + +**Example** + +Query: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Result: + +```response +┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) ## reinterpretAsInt(8\|16\|32\|64) @@ -773,11 +847,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). +- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). **Examples** @@ -829,12 +903,12 @@ reinterpret(x, type) **Arguments** -- `x` — Any type. -- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). +- `x` — Any type. +- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Destination type value. +- Destination type value. **Examples** @@ -868,13 +942,13 @@ x::t **Arguments** -- `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). -- `t` — The target data type. +- `x` — A value to convert. May be of any type. +- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). +- `t` — The target data type. **Returned value** -- Converted value. +- Converted value. :::note If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. @@ -956,7 +1030,7 @@ Result: **See also** -- [cast_keep_nullable](/docs/en/operations/settings/settings.md/#cast_keep_nullable) setting +- [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting ## accurateCast(x, T) @@ -1004,12 +1078,12 @@ accurateCastOrNull(x, T) **Parameters** -- `x` — Input value. -- `T` — The name of the returned data type. +- `x` — Input value. +- `T` — The name of the returned data type. **Returned value** -- The value, converted to the specified data type `T`. +- The value, converted to the specified data type `T`. **Example** @@ -1057,13 +1131,13 @@ accurateCastOrDefault(x, T) **Parameters** -- `x` — Input value. -- `T` — The name of the returned data type. -- `default_value` — Default value of returned data type. +- `x` — Input value. +- `T` — The name of the returned data type. +- `default_value` — Default value of returned data type. **Returned value** -- The value converted to the specified data type `T`. +- The value converted to the specified data type `T`. **Example** @@ -1120,11 +1194,11 @@ toIntervalYear(number) **Arguments** -- `number` — Duration of interval. Positive integer number. +- `number` — Duration of interval. Positive integer number. **Returned values** -- The value in `Interval` data type. +- The value in `Interval` data type. **Example** @@ -1148,6 +1222,102 @@ Result: └───────────────────────────┴──────────────────────────────┘ ``` +## parseDateTime {#type_conversion_functions-parseDateTime} + +Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). + +This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime). + +**Syntax** + +``` sql +parseDateTime(str, format[, timezone]) +``` + +**Arguments** + +- `str` — the String to be parsed +- `format` — the format string +- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. + +**Returned value(s)** + +Returns DateTime values parsed from input string according to a MySQL style format string. + +**Supported format specifiers** + +All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except: +- %Q: Quarter (1-4) + +**Example** + +``` sql +SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s') + +┌─parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')─┐ +│ 2021-01-04 23:00:00 │ +└───────────────────────────────────────────────────────────┘ +``` + +Alias: `TO_TIMESTAMP`. + +## parseDateTimeOrZero + +Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns zero date when it encounters a date format that cannot be processed. + +## parseDateTimeOrNull + +Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns `NULL` when it encounters a date format that cannot be processed. + +Alias: `str_to_date`. + +## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax} + +Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. + +This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). + +**Syntax** + +``` sql +parseDateTimeInJodaSyntax(str, format[, timezone]) +``` + +**Arguments** + +- `str` — the String to be parsed +- `format` — the format string +- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. + +**Returned value(s)** + +Returns DateTime values parsed from input string according to a Joda style format. + +**Supported format specifiers** + +All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: +- S: fraction of second +- z: time zone +- Z: time zone offset/id + +**Example** + +``` sql +SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk') + +┌─parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')─┐ +│ 2023-02-24 14:53:31 │ +└─────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## parseDateTimeInJodaSyntaxOrZero + +Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns zero date when it encounters a date format that cannot be processed. + +## parseDateTimeInJodaSyntaxOrNull + +Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns `NULL` when it encounters a date format that cannot be processed. + ## parseDateTimeBestEffort ## parseDateTime32BestEffort @@ -1163,22 +1333,22 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). **Supported non-standard formats** -- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). -- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. -- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc. -- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. -- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. +- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). +- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. +- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc. +- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. +- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. For all of the formats with separator the function parses months names expressed by their full name or by the first three letters of a month name. Examples: `24/DEC/18`, `24-Dec-18`, `01-September-2018`. **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1258,10 +1428,10 @@ Result: **See Also** -- [RFC 1123](https://tools.ietf.org/html/rfc1123) -- [toDate](#todate) -- [toDateTime](#todatetime) -- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) +- [RFC 1123](https://tools.ietf.org/html/rfc1123) +- [toDate](#todate) +- [toDateTime](#todatetime) +- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) ## parseDateTimeBestEffortUS @@ -1297,13 +1467,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Parameters** -- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1351,7 +1521,6 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity and returns zero date or zero date time when it encounters a date format that cannot be processed. - ## toLowCardinality Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. @@ -1366,11 +1535,11 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). **Returned values** -- Result of `expr`. +- Result of `expr`. Type: `LowCardinality(expr_result_type)` @@ -1412,11 +1581,11 @@ toUnixTimestamp64Nano(value) **Arguments** -- `value` — DateTime64 value with any precision. +- `value` — DateTime64 value with any precision. **Returned value** -- `value` converted to the `Int64` data type. +- `value` converted to the `Int64` data type. **Examples** @@ -1468,12 +1637,12 @@ fromUnixTimestamp64Nano(value [, timezone]) **Arguments** -- `value` — `Int64` value with any precision. -- `timezone` — `String` (optional) timezone name of the result. +- `value` — `Int64` value with any precision. +- `timezone` — `String` (optional) timezone name of the result. **Returned value** -- `value` converted to the `DateTime64` data type. +- `value` converted to the `DateTime64` data type. **Example** @@ -1504,12 +1673,12 @@ formatRow(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). -- `x`,`y`, ... — Expressions. +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). +- `x`,`y`, ... — Expressions. **Returned value** -- A formatted string. (for text formats it's usually terminated with the new line character). +- A formatted string. (for text formats it's usually terminated with the new line character). **Example** @@ -1575,12 +1744,12 @@ formatRowNoNewline(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). -- `x`,`y`, ... — Expressions. +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). +- `x`,`y`, ... — Expressions. **Returned value** -- A formatted string. +- A formatted string. **Example** @@ -1613,12 +1782,12 @@ snowflakeToDateTime(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Example** @@ -1649,12 +1818,12 @@ snowflakeToDateTime64(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. +- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. **Example** @@ -1685,11 +1854,11 @@ dateTimeToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). **Returned value** -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** @@ -1719,11 +1888,11 @@ dateTime64ToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Returned value** -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md new file mode 100644 index 00000000000..a58c1364780 --- /dev/null +++ b/docs/en/sql-reference/functions/udf.md @@ -0,0 +1,249 @@ +--- +slug: /en/sql-reference/functions/udf +sidebar_position: 15 +sidebar_label: UDF +--- + +# UDFs User Defined Functions + + +## Executable User Defined Functions +ClickHouse can call any external executable program or script to process data. + +The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter. + +A function configuration contains the following settings: + +- `name` - a function name. +- `command` - script name to execute or command if `execute_direct` is false. +- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. +- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. +- `return_type` - the type of a returned value. +- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. +- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. +- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. +- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `pool_size` - the size of a command pool. Optional. Default value is `16`. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. +- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. + +The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk. + +**Example** + +Creating `test_function` using XML configuration. +File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). +```xml + + + executable + test_function_python + String + + UInt64 + value + + TabSeparated + test_function.py + + +``` + +Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings). + +```python +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for line in sys.stdin: + print("Value " + line, end='') + sys.stdout.flush() +``` + +Query: + +``` sql +SELECT test_function_python(toUInt64(2)); +``` + +Result: + +``` text +┌─test_function_python(2)─┐ +│ Value 2 │ +└─────────────────────────┘ +``` + +Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration. +File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). +```xml + + + executable + test_function_sum + UInt64 + + UInt64 + lhs + + + UInt64 + rhs + + TabSeparated + cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table" + 0 + + +``` + +Query: + +``` sql +SELECT test_function_sum(2, 2); +``` + +Result: + +``` text +┌─test_function_sum(2, 2)─┐ +│ 4 │ +└─────────────────────────┘ +``` + +Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration. +File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings). +```xml + + + executable + test_function_sum_json + UInt64 + result_name + + UInt64 + argument_1 + + + UInt64 + argument_2 + + JSONEachRow + test_function_sum_json.py + + +``` + +Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings). + +```python +#!/usr/bin/python3 + +import sys +import json + +if __name__ == '__main__': + for line in sys.stdin: + value = json.loads(line) + first_arg = int(value['argument_1']) + second_arg = int(value['argument_2']) + result = {'result_name': first_arg + second_arg} + print(json.dumps(result), end='\n') + sys.stdout.flush() +``` + +Query: + +``` sql +SELECT test_function_sum_json(2, 2); +``` + +Result: + +``` text +┌─test_function_sum_json(2, 2)─┐ +│ 4 │ +└──────────────────────────────┘ +``` + +Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). +File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). +```xml + + + executable + test_function_parameter_python + String + + UInt64 + + TabSeparated + test_function_parameter_python.py {test_parameter:UInt64} + + +``` + +Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings). + +```python +#!/usr/bin/python3 + +import sys + +if __name__ == "__main__": + for line in sys.stdin: + print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="") + sys.stdout.flush() +``` + +Query: + +``` sql +SELECT test_function_parameter_python(1)(2); +``` + +Result: + +``` text +┌─test_function_parameter_python(1)(2)─┐ +│ Parameter 1 value 2 │ +└──────────────────────────────────────┘ +``` + +## Error Handling + +Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query. + +## Evaluation of Argument Expressions + +In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`. +But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately. + +## Performing Functions for Distributed Query Processing + +For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server. + +This means that functions can be performed on different servers. +For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),` + +- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server. +- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server. + +The result of a function usually does not depend on which server it is performed on. However, sometimes this is important. +For example, functions that work with dictionaries use the dictionary that exists on the server they are running on. +Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query. + +If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`. + +## SQL User Defined Functions + +Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement. + +## Related Content + +### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md index 94167945f76..eb69b1779ae 100644 --- a/docs/en/sql-reference/functions/ulid-functions.md +++ b/docs/en/sql-reference/functions/ulid-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ulid-functions -sidebar_position: 54 +sidebar_position: 190 sidebar_label: ULID --- @@ -18,7 +18,7 @@ generateULID([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -48,6 +48,39 @@ SELECT generateULID(1), generateULID(2) └────────────────────────────┴────────────────────────────┘ ``` +## ULIDStringToDateTime + +This function extracts the timestamp from a ULID. + +**Syntax** + +``` sql +ULIDStringToDateTime(ulid[, timezone]) +``` + +**Arguments** + +- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Timestamp with milliseconds precision. + +Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). + +**Usage example** + +``` sql +SELECT ULIDStringToDateTime('01GNB2S2FGN2P93QPXDNB4EN2R') +``` + +``` text +┌─ULIDStringToDateTime('01GNB2S2FGN2P93QPXDNB4EN2R')─┐ +│ 2022-12-28 00:40:37.616 │ +└────────────────────────────────────────────────────┘ +``` + ## See Also -- [UUID](../../sql-reference/functions/uuid-functions.md) +- [UUID](../../sql-reference/functions/uuid-functions.md) diff --git a/docs/en/sql-reference/functions/uniqtheta-functions.md b/docs/en/sql-reference/functions/uniqtheta-functions.md index b2d3712abfc..abe58e48715 100644 --- a/docs/en/sql-reference/functions/uniqtheta-functions.md +++ b/docs/en/sql-reference/functions/uniqtheta-functions.md @@ -1,5 +1,7 @@ --- slug: /en/sql-reference/functions/uniqtheta-functions +sidebar_position: 210 +sidebar_label: uniqTheta --- # uniqTheta Functions @@ -21,7 +23,7 @@ uniqThetaUnion(uniqThetaSketch,uniqThetaSketch) **Arguments** -- `uniqThetaSketch` – uniqThetaSketch object. +- `uniqThetaSketch` – uniqThetaSketch object. **Example** @@ -47,7 +49,7 @@ uniqThetaIntersect(uniqThetaSketch,uniqThetaSketch) **Arguments** -- `uniqThetaSketch` – uniqThetaSketch object. +- `uniqThetaSketch` – uniqThetaSketch object. **Example** @@ -73,7 +75,7 @@ uniqThetaNot(uniqThetaSketch,uniqThetaSketch) **Arguments** -- `uniqThetaSketch` – uniqThetaSketch object. +- `uniqThetaSketch` – uniqThetaSketch object. **Example** @@ -91,4 +93,4 @@ from **See Also** -- [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index b515f6ad518..f6871c86c4f 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/url-functions -sidebar_position: 54 +sidebar_position: 200 sidebar_label: URLs --- @@ -28,7 +28,7 @@ domain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -48,8 +48,8 @@ clickhouse.com **Returned values** -- Host name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse can’t parse the input string as a URL. +- Host name. If ClickHouse can parse the input string as a URL. +- Empty string. If ClickHouse can’t parse the input string as a URL. Type: `String`. @@ -79,7 +79,7 @@ topLevelDomain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -91,8 +91,8 @@ https://clickhouse.com/time/ **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse cannot parse the input string as a URL. +- Domain name. If ClickHouse can parse the input string as a URL. +- Empty string. If ClickHouse cannot parse the input string as a URL. Type: `String`. @@ -118,9 +118,9 @@ Returns the part of the domain that includes top-level subdomains up to the “f For example: -- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. -- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. -- `cutToFirstSignificantSubdomain('tr') = ''`. +- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### cutToFirstSignificantSubdomainWithWWW @@ -128,9 +128,9 @@ Returns the part of the domain that includes top-level subdomains up to the “f For example: -- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. -- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. -- `cutToFirstSignificantSubdomain('tr') = ''`. +- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### cutToFirstSignificantSubdomainCustom @@ -157,12 +157,12 @@ cutToFirstSignificantSubdomain(URL, TLD) **Parameters** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain. +- Part of the domain that includes top-level subdomains up to the first significant subdomain. Type: [String](../../sql-reference/data-types/string.md). @@ -184,7 +184,7 @@ Result: **See Also** -- [firstSignificantSubdomain](#firstsignificantsubdomain). +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### cutToFirstSignificantSubdomainCustomWithWWW @@ -211,12 +211,12 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) **Parameters** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Type: [String](../../sql-reference/data-types/string.md). @@ -238,7 +238,7 @@ Result: **See Also** -- [firstSignificantSubdomain](#firstsignificantsubdomain). +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### firstSignificantSubdomainCustom @@ -265,12 +265,12 @@ firstSignificantSubdomainCustom(URL, TLD) **Parameters** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- First significant subdomain. +- First significant subdomain. Type: [String](../../sql-reference/data-types/string.md). @@ -292,7 +292,7 @@ Result: **See Also** -- [firstSignificantSubdomain](#firstsignificantsubdomain). +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### port(URL\[, default_port = 0\]) @@ -418,11 +418,11 @@ netloc(URL) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). **Returned value** -- `username:password@host:port`. +- `username:password@host:port`. Type: `String`. @@ -474,12 +474,12 @@ cutURLParameter(URL, name) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). -- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. **Returned value** -- URL with `name` URL parameter removed. +- URL with `name` URL parameter removed. Type: `String`. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 474e3248d1f..c338add3a57 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -1,12 +1,10 @@ --- slug: /en/sql-reference/functions/uuid-functions -sidebar_position: 53 -sidebar_label: UUID +sidebar_position: 205 +sidebar_label: UUIDs --- -# Functions for Working with UUID - -The functions for working with UUID are listed below. +# Functions for Working with UUIDs ## generateUUIDv4 @@ -20,7 +18,7 @@ generateUUIDv4([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -69,11 +67,11 @@ The function also works for [arrays](array-functions.md#function-empty) or [stri **Arguments** -- `x` — Input UUID. [UUID](../data-types/uuid.md). +- `x` — Input UUID. [UUID](../data-types/uuid.md). **Returned value** -- Returns `1` for an empty UUID or `0` for a non-empty UUID. +- Returns `1` for an empty UUID or `0` for a non-empty UUID. Type: [UInt8](../data-types/int-uint.md). @@ -111,11 +109,11 @@ The function also works for [arrays](array-functions.md#function-notempty) or [s **Arguments** -- `x` — Input UUID. [UUID](../data-types/uuid.md). +- `x` — Input UUID. [UUID](../data-types/uuid.md). **Returned value** -- Returns `1` for a non-empty UUID or `0` for an empty UUID. +- Returns `1` for a non-empty UUID or `0` for an empty UUID. Type: [UInt8](../data-types/int-uint.md). @@ -165,8 +163,8 @@ SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). -- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). +- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). +- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). **Returned value** @@ -265,8 +263,8 @@ UUIDStringToNum(string[, variant = 1]) **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal). -- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. +- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal). +- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -310,8 +308,8 @@ UUIDNumToString(binary[, variant = 1]) **Arguments** -- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. -- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. +- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. +- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -355,10 +353,10 @@ serverUUID() **Returned value** -- The UUID of the server. +- The UUID of the server. Type: [UUID](../data-types/uuid.md). ## See Also -- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) +- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index cf25b67a15a..a516f09d709 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ym-dict-functions -sidebar_position: 59 +sidebar_position: 60 sidebar_label: Embedded Dictionaries --- @@ -118,13 +118,13 @@ regionToTopContinent(id[, geobase]) **Arguments** -- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). -- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. +- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. **Returned value** -- Identifier of the top level continent (the latter when you climb the hierarchy of regions). -- 0, if there is none. +- Identifier of the top level continent (the latter when you climb the hierarchy of regions). +- 0, if there is none. Type: `UInt32`. diff --git a/docs/en/sql-reference/index.md b/docs/en/sql-reference/index.md new file mode 100644 index 00000000000..eddc5b204d9 --- /dev/null +++ b/docs/en/sql-reference/index.md @@ -0,0 +1,22 @@ +--- +keywords: [clickhouse, docs, sql reference, sql statements, sql, syntax] +title: SQL Reference +--- + +import { TwoColumnList } from '/src/components/two_column_list' +import { ClickableSquare } from '/src/components/clickable_square' +import { HorizontalDivide } from '/src/components/horizontal_divide' +import { ViewAllLink } from '/src/components/view_all_link' +import { VideoContainer } from '/src/components/video_container' + +import LinksDeployment from './sql-reference-links.json' + +# ClickHouse SQL Reference + +ClickHouse supports a declarative query language based on SQL that is identical to the ANSI SQL standard in many cases. + +Supported queries include GROUP BY, ORDER BY, subqueries in FROM, JOIN clause, IN operator, window functions and scalar subqueries. + + + + \ No newline at end of file diff --git a/docs/en/sql-reference/operators/exists.md b/docs/en/sql-reference/operators/exists.md index 4bc29389c9c..5e96e11b924 100644 --- a/docs/en/sql-reference/operators/exists.md +++ b/docs/en/sql-reference/operators/exists.md @@ -7,7 +7,7 @@ The `EXISTS` operator checks how many records are in the result of a subquery. I `EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause. -:::warning +:::tip References to main query tables and columns are not supported in a subquery. ::: diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 58119cfc4f5..8a8c86624d2 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -158,7 +158,7 @@ Now let’s examine a query with IN: SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) ``` -- Calculation of the intersection of audiences of two sites. +- Calculation of the intersection of audiences of two sites. This query will be sent to all remote servers as @@ -233,8 +233,9 @@ If `some_predicate` is not selective enough, it will return large amount of data ### Distributed Subqueries and max_parallel_replicas -When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following: +When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed. +For example, the following: ```sql SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) SETTINGS max_parallel_replicas=3 @@ -247,8 +248,12 @@ SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM SETTINGS parallel_replicas_count=3, parallel_replicas_offset=M ``` -where M is between 1 and 3 depending on which replica the local query is executing on. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table. +where M is between 1 and 3 depending on which replica the local query is executing on. -Therefore adding the max_parallel_replicas setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN. +These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table. + +Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if local_table_2 does not have a sampling key, incorrect results will be produced. The same rule applies to JOIN. One workaround if local_table_2 does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. + +If a table doesn't have a sampling key, more flexible options for [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) can be used that can produce different and more optimal behaviour. diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 0fe7ebbf4b6..c8ed2627e2b 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -159,12 +159,12 @@ Extract parts from a given date. For example, you can retrieve a month from a gi The `part` parameter specifies which part of the date to retrieve. The following values are available: -- `DAY` — The day of the month. Possible values: 1–31. -- `MONTH` — The number of a month. Possible values: 1–12. -- `YEAR` — The year. -- `SECOND` — The second. Possible values: 0–59. -- `MINUTE` — The minute. Possible values: 0–59. -- `HOUR` — The hour. Possible values: 0–23. +- `DAY` — The day of the month. Possible values: 1–31. +- `MONTH` — The number of a month. Possible values: 1–12. +- `YEAR` — The year. +- `SECOND` — The second. Possible values: 0–59. +- `MINUTE` — The minute. Possible values: 0–59. +- `HOUR` — The hour. Possible values: 0–23. The `part` parameter is case-insensitive. @@ -229,7 +229,7 @@ Types of intervals: You can also use a string literal when setting the `INTERVAL` value. For example, `INTERVAL 1 HOUR` is identical to the `INTERVAL '1 hour'` or `INTERVAL '1' hour`. -:::warning +:::tip Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below. ::: @@ -285,8 +285,8 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul') AS time, time + 60 * 6 **See Also** -- [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type -- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions +- [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type +- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions ## Logical AND Operator @@ -355,10 +355,10 @@ ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. ### IS NULL -- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: - - `1`, if the value is `NULL`. - - `0` otherwise. -- For other values, the `IS NULL` operator always returns `0`. +- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: + - `1`, if the value is `NULL`. + - `0` otherwise. +- For other values, the `IS NULL` operator always returns `0`. Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT n IS NULL FROM table` transforms to `SELECT n.null FROM TABLE`. @@ -376,10 +376,10 @@ SELECT x+100 FROM t_null WHERE y IS NULL ### IS NOT NULL -- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: - - `0`, if the value is `NULL`. - - `1` otherwise. -- For other values, the `IS NOT NULL` operator always returns `1`. +- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: + - `0`, if the value is `NULL`. + - `1` otherwise. +- For other values, the `IS NOT NULL` operator always returns `1`. diff --git a/docs/en/sql-reference/sql-reference-links.json b/docs/en/sql-reference/sql-reference-links.json new file mode 100644 index 00000000000..3811ad18462 --- /dev/null +++ b/docs/en/sql-reference/sql-reference-links.json @@ -0,0 +1,12 @@ +[ + { + "title": "Statements", + "description": "A list of available SQL statements in ClickHouse", + "url": "/docs/en/sql-reference/statements/" + }, + { + "title": "Database and Table Engines", + "description": "Engines determine where and how your data is stored", + "url": "/docs/en/engines/table-engines" + } +] diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index d580efa4992..378f41c1199 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -18,14 +18,14 @@ Each action is an operation on a column. The following actions are supported: -- [ADD COLUMN](#add-column) — Adds a new column to the table. -- [DROP COLUMN](#drop-column) — Deletes the column. -- [RENAME COLUMN](#rename-column) — Renames an existing column. -- [CLEAR COLUMN](#clear-column) — Resets column values. -- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. -- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. -- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. -- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. +- [ADD COLUMN](#add-column) — Adds a new column to the table. +- [DROP COLUMN](#drop-column) — Deletes the column. +- [RENAME COLUMN](#rename-column) — Renames an existing column. +- [CLEAR COLUMN](#clear-column) — Resets column values. +- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. +- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. +- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. +- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. These actions are described in detail below. @@ -75,7 +75,7 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified, Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly. -:::warning +:::tip You can’t delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error. ::: @@ -132,7 +132,7 @@ Comments are stored in the `comment_expression` column returned by the [DESCRIBE Example: ``` sql -ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' +ALTER TABLE visits COMMENT COLUMN browser 'This column shows the browser used for accessing the site.' ``` ## MODIFY COLUMN @@ -144,13 +144,13 @@ ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER na This query changes the `name` column properties: -- Type +- Type -- Default expression +- Default expression -- Compression Codec +- Compression Codec -- TTL +- TTL For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs). diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index f8742765619..cc49c6abf80 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY COMMENT 'Comment' **Examples** -Creating a table with comment (for more information, see the [COMMENT] clause(../../../sql-reference/statements/create/table.md#comment-table)): +Creating a table with comment (for more information, see the [COMMENT](../../../sql-reference/statements/create/table.md#comment-table) clause): ``` sql CREATE TABLE table_with_comment diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index 844b24d7374..7a8f5809320 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -17,7 +17,7 @@ See more on [constraints](../../../sql-reference/statements/create/table.md#cons Queries will add or remove metadata about constraints from table so they are processed immediately. -:::warning +:::tip Constraint check **will not be executed** on existing data if it was added. ::: diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index 30ed96c0b9c..b6f45b67d52 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -25,6 +25,10 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) -- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 5d7b92bd34d..7a687a067aa 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -8,14 +8,14 @@ sidebar_label: ALTER Most `ALTER TABLE` queries modify table settings or data: -- [COLUMN](/docs/en/sql-reference/statements/alter/column.md) -- [PARTITION](/docs/en/sql-reference/statements/alter/partition.md) -- [DELETE](/docs/en/sql-reference/statements/alter/delete.md) -- [UPDATE](/docs/en/sql-reference/statements/alter/update.md) -- [ORDER BY](/docs/en/sql-reference/statements/alter/order-by.md) -- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md) -- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) -- [TTL](/docs/en/sql-reference/statements/alter/ttl.md) +- [COLUMN](/docs/en/sql-reference/statements/alter/column.md) +- [PARTITION](/docs/en/sql-reference/statements/alter/partition.md) +- [DELETE](/docs/en/sql-reference/statements/alter/delete.md) +- [UPDATE](/docs/en/sql-reference/statements/alter/update.md) +- [ORDER BY](/docs/en/sql-reference/statements/alter/order-by.md) +- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md) +- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) +- [TTL](/docs/en/sql-reference/statements/alter/ttl.md) :::note Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md). @@ -23,16 +23,16 @@ Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines These `ALTER` statements manipulate views: -- [ALTER TABLE ... MODIFY QUERY](/docs/en/sql-reference/statements/alter/view.md) — Modifies a [Materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized) structure. -- [ALTER LIVE VIEW](/docs/en/sql-reference/statements/alter/view.md/#alter-live-view) — Refreshes a [Live view](/docs/en/sql-reference/statements/create/view.md/#live-view). +- [ALTER TABLE ... MODIFY QUERY](/docs/en/sql-reference/statements/alter/view.md) — Modifies a [Materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized) structure. +- [ALTER LIVE VIEW](/docs/en/sql-reference/statements/alter/view.md/#alter-live-view) — Refreshes a [Live view](/docs/en/sql-reference/statements/create/view.md/#live-view). These `ALTER` statements modify entities related to role-based access control: -- [USER](/docs/en/sql-reference/statements/alter/user.md) -- [ROLE](/docs/en/sql-reference/statements/alter/role.md) -- [QUOTA](/docs/en/sql-reference/statements/alter/quota.md) -- [ROW POLICY](/docs/en/sql-reference/statements/alter/row-policy.md) -- [SETTINGS PROFILE](/docs/en/sql-reference/statements/alter/settings-profile.md) +- [USER](/docs/en/sql-reference/statements/alter/user.md) +- [ROLE](/docs/en/sql-reference/statements/alter/role.md) +- [QUOTA](/docs/en/sql-reference/statements/alter/quota.md) +- [ROW POLICY](/docs/en/sql-reference/statements/alter/row-policy.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/alter/settings-profile.md) [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. @@ -61,3 +61,7 @@ For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active fo ::: For `ALTER TABLE ... UPDATE|DELETE` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting. + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index aad52efb39d..a0aa74e6d25 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -7,20 +7,20 @@ title: "Manipulating Partitions and Parts" The following operations with [partitions](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) are available: -- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. -- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. -- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. -- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. -- [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. -- [MOVE PARTITION TO TABLE](#move-partition-to-table) — Moves the data partition from one table to another. -- [CLEAR COLUMN IN PARTITION](#clear-column-in-partition) — Resets the value of a specified column in a partition. -- [CLEAR INDEX IN PARTITION](#clear-index-in-partition) — Resets the specified secondary index in a partition. -- [FREEZE PARTITION](#freeze-partition) — Creates a backup of a partition. -- [UNFREEZE PARTITION](#unfreeze-partition) — Removes a backup of a partition. -- [FETCH PARTITION\|PART](#fetch-partitionpart) — Downloads a part or partition from another server. -- [MOVE PARTITION\|PART](#move-partitionpart) — Move partition/data part to another disk or volume. -- [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. -- [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. +- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. +- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. +- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. +- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. +- [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. +- [MOVE PARTITION TO TABLE](#move-partition-to-table) — Moves the data partition from one table to another. +- [CLEAR COLUMN IN PARTITION](#clear-column-in-partition) — Resets the value of a specified column in a partition. +- [CLEAR INDEX IN PARTITION](#clear-index-in-partition) — Resets the specified secondary index in a partition. +- [FREEZE PARTITION](#freeze-partition) — Creates a backup of a partition. +- [UNFREEZE PARTITION](#unfreeze-partition) — Removes a backup of a partition. +- [FETCH PARTITION\|PART](#fetch-partitionpart) — Downloads a part or partition from another server. +- [MOVE PARTITION\|PART](#move-partitionpart) — Move partition/data part to another disk or volume. +- [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. +- [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. @@ -103,13 +103,17 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab ``` This query copies the data partition from `table1` to `table2`. -Note that data will be deleted neither from `table1` nor from `table2`. + +Note that: + +- Data will be deleted neither from `table1` nor from `table2`. +- `table1` may be a temporary table. For the query to run successfully, the following conditions must be met: -- Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables). +- Both tables must have the same structure. +- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same storage policy. ## REPLACE PARTITION @@ -117,13 +121,18 @@ For the query to run successfully, the following conditions must be met: ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1 ``` -This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. Note that data won’t be deleted from `table1`. +This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. + +Note that: + +- Data won’t be deleted from `table1`. +- `table1` may be a temporary table. For the query to run successfully, the following conditions must be met: -- Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables). +- Both tables must have the same structure. +- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same storage policy. ## MOVE PARTITION TO TABLE @@ -135,10 +144,10 @@ This query moves the data partition from the `table_source` to `table_dest` with For the query to run successfully, the following conditions must be met: -- Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables). -- Both tables must be the same engine family (replicated or non-replicated). +- Both tables must have the same structure. +- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same storage policy. +- Both tables must be the same engine family (replicated or non-replicated). ## CLEAR COLUMN IN PARTITION @@ -170,9 +179,9 @@ Note that for old-styled tables you can specify the prefix of the partition name At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where: -- `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. -- `N` is the incremental number of the backup. -- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. +- `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. +- `N` is the incremental number of the backup. +- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. :::note If you use [a set of disks for data storage in a table](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. @@ -240,8 +249,8 @@ ALTER TABLE users ATTACH PART 201901_2_2_0; Note that: -- The `ALTER ... FETCH PARTITION|PART` query isn’t replicated. It places the part or partition to the `detached` directory only on the local server. -- The `ALTER TABLE ... ATTACH` query is replicated. It adds the data to all replicas. The data is added to one of the replicas from the `detached` directory, and to the others - from neighboring replicas. +- The `ALTER ... FETCH PARTITION|PART` query isn’t replicated. It places the part or partition to the `detached` directory only on the local server. +- The `ALTER TABLE ... ATTACH` query is replicated. It adds the data to all replicas. The data is added to one of the replicas from the `detached` directory, and to the others - from neighboring replicas. Before downloading, the system checks if the partition exists and the table structure matches. The most appropriate replica is selected automatically from the healthy replicas. @@ -257,9 +266,9 @@ ALTER TABLE table_name [ON CLUSTER cluster] MOVE PARTITION|PART partition_expr T The `ALTER TABLE t MOVE` query: -- Not replicated, because different replicas can have different storage policies. -- Returns an error if the specified disk or volume is not configured. Query also returns an error if conditions of data moving, that specified in the storage policy, can’t be applied. -- Can return an error in the case, when data to be moved is already moved by a background process, concurrent `ALTER TABLE t MOVE` query or as a result of background data merging. A user shouldn’t perform any additional actions in this case. +- Not replicated, because different replicas can have different storage policies. +- Returns an error if the specified disk or volume is not configured. Query also returns an error if conditions of data moving, that specified in the storage policy, can’t be applied. +- Can return an error in the case, when data to be moved is already moved by a background process, concurrent `ALTER TABLE t MOVE` query or as a result of background data merging. A user shouldn’t perform any additional actions in this case. Example: @@ -275,18 +284,22 @@ Manipulates data in the specifies partition matching the specified filtering exp Syntax: ``` sql -ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr +ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_expr] WHERE filter_expr ``` ### Example ``` sql +-- using partition name ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2; + +-- using partition id +ALTER TABLE mt UPDATE x = x + 1 IN PARTITION ID '2' WHERE p = 2; ``` ### See Also -- [UPDATE](/docs/en/sql-reference/statements/alter/update.md/#alter-table-update-statements) +- [UPDATE](/docs/en/sql-reference/statements/alter/update.md/#alter-table-update-statements) ## DELETE IN PARTITION @@ -295,27 +308,31 @@ Deletes data in the specifies partition matching the specified filtering express Syntax: ``` sql -ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE [IN PARTITION partition_id] WHERE filter_expr +ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE [IN PARTITION partition_expr] WHERE filter_expr ``` ### Example ``` sql +-- using partition name ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2; + +-- using partition id +ALTER TABLE mt DELETE IN PARTITION ID '2' WHERE p = 2; ``` ### See Also -- [DELETE](/docs/en/sql-reference/statements/alter/delete.md/#alter-mutations) +- [DELETE](/docs/en/sql-reference/statements/alter/delete.md/#alter-mutations) ## How to Set Partition Expression You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: -- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. -- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. -- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. +- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. +- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. +- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. +- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index 908d28d7ab1..030e9352a00 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -17,7 +17,7 @@ Projections will create internally a new hidden table, this means that more IO a Example, If the projection has defined a different primary key, all the data from the original table will be duplicated. ::: -You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections). +You can see more technical details about how projections work internally on this [page](/docs/en/guides/best-practices/sparse-primary-indexes.md/#option-3-projections). ## Example filtering without using primary keys @@ -37,7 +37,7 @@ Using `ALTER TABLE`, we could add the Projection to an existing table: ``` ALTER TABLE visits_order ADD PROJECTION user_name_projection ( SELECT -* +* ORDER BY user_name ) @@ -128,7 +128,7 @@ SELECT user_agent, sum(pages_visited) FROM visits -GROUP BY user_id +GROUP BY user_agent ``` As mentioned before, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used: @@ -161,6 +161,6 @@ The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only Also, they are replicated, syncing projections metadata via ClickHouse Keeper or ZooKeeper. -:::note +:::note Projection manipulation is supported only for tables with [`*MergeTree`](/docs/en/engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](/docs/en/engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/docs/en/sql-reference/statements/alter/setting.md b/docs/en/sql-reference/statements/alter/setting.md index f68f035146a..e18ec0cc293 100644 --- a/docs/en/sql-reference/statements/alter/setting.md +++ b/docs/en/sql-reference/statements/alter/setting.md @@ -58,4 +58,4 @@ ALTER TABLE example_table RESET SETTING max_part_loading_threads; **See Also** -- [MergeTree settings](../../../operations/settings/merge-tree-settings.md) +- [MergeTree settings](../../../operations/settings/merge-tree-settings.md) diff --git a/docs/en/sql-reference/statements/alter/skipping-index.md b/docs/en/sql-reference/statements/alter/skipping-index.md index 87124c86eac..67af76986da 100644 --- a/docs/en/sql-reference/statements/alter/skipping-index.md +++ b/docs/en/sql-reference/statements/alter/skipping-index.md @@ -10,11 +10,11 @@ sidebar_label: INDEX The following operations are available: -- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. +- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. -- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). -- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. +- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. The first two commands are lightweight in a sense that they only change metadata or remove files. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index 92f0f111b92..ab7d0ca7378 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -24,6 +24,11 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) -- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting + + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 31db89164d7..8785610f58a 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -25,10 +25,10 @@ To use `ALTER USER` you must have the [ALTER USER](../../../sql-reference/statem Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause: -- `user` — Specifies a user this user can grant privileges to. -- `role` — Specifies a role this user can grant privileges to. -- `ANY` — This user can grant privileges to anyone. It's the default setting. -- `NONE` — This user can grant privileges to none. +- `user` — Specifies a user this user can grant privileges to. +- `role` — Specifies a role this user can grant privileges to. +- `ANY` — This user can grant privileges to anyone. It's the default setting. +- `NONE` — This user can grant privileges to none. You can exclude any user or role by using the `EXCEPT` expression. For example, `ALTER USER user1 GRANTEES ANY EXCEPT user2`. It means if `user1` has some privileges granted with `GRANT OPTION` it will be able to grant those privileges to anyone except `user2`. diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index e382cdace30..5c5bf0355f6 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -6,11 +6,166 @@ sidebar_label: VIEW # ALTER TABLE … MODIFY QUERY Statement -You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement. Use it when the materialized view was created without the `TO [db.]name` clause. The `allow_experimental_alter_materialized_view_structure` setting must be enabled. +You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -If a materialized view uses the `TO [db.]name` construction, you must [DETACH](../detach.md) the view, run [ALTER TABLE](index.md) query for the target table, and then [ATTACH](../attach.md) the previously detached (`DETACH`) view. +The `allow_experimental_alter_materialized_view_structure` setting must be enabled. -**Example** +This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. + +**Example with TO table** + +```sql +CREATE TABLE events (ts DateTime, event_type String) +ENGINE = MergeTree ORDER BY (event_type, ts); + +CREATE TABLE events_by_day (ts DateTime, event_type String, events_cnt UInt64) +ENGINE = SummingMergeTree ORDER BY (event_type, ts); + +CREATE MATERIALIZED VIEW mv TO events_by_day AS +SELECT toStartOfDay(ts) ts, event_type, count() events_cnt +FROM events +GROUP BY ts, event_type; + +INSERT INTO events +SELECT Date '2020-01-01' + interval number * 900 second, + ['imp', 'click'][number%2+1] +FROM numbers(100); + +SELECT ts, event_type, sum(events_cnt) +FROM events_by_day +GROUP BY ts, event_type +ORDER BY ts, event_type; + +┌──────────────────ts─┬─event_type─┬─sum(events_cnt)─┐ +│ 2020-01-01 00:00:00 │ click │ 48 │ +│ 2020-01-01 00:00:00 │ imp │ 48 │ +│ 2020-01-02 00:00:00 │ click │ 2 │ +│ 2020-01-02 00:00:00 │ imp │ 2 │ +└─────────────────────┴────────────┴─────────────────┘ + +-- Let's add the new measurment `cost` +-- and the new dimension `browser`. + +ALTER TABLE events + ADD COLUMN browser String, + ADD COLUMN cost Float64; + +-- Column do not have to match in a materialized view and TO +-- (destination table), so the next alter does not break insertion. + +ALTER TABLE events_by_day + ADD COLUMN cost Float64, + ADD COLUMN browser String after event_type, + MODIFY ORDER BY (event_type, ts, browser); + +INSERT INTO events +SELECT Date '2020-01-02' + interval number * 900 second, + ['imp', 'click'][number%2+1], + ['firefox', 'safary', 'chrome'][number%3+1], + 10/(number+1)%33 +FROM numbers(100); + +-- New columns `browser` and `cost` are empty because we did not change Materialized View yet. + +SELECT ts, event_type, browser, sum(events_cnt) events_cnt, round(sum(cost),2) cost +FROM events_by_day +GROUP BY ts, event_type, browser +ORDER BY ts, event_type; + +┌──────────────────ts─┬─event_type─┬─browser─┬─events_cnt─┬─cost─┐ +│ 2020-01-01 00:00:00 │ click │ │ 48 │ 0 │ +│ 2020-01-01 00:00:00 │ imp │ │ 48 │ 0 │ +│ 2020-01-02 00:00:00 │ click │ │ 50 │ 0 │ +│ 2020-01-02 00:00:00 │ imp │ │ 50 │ 0 │ +│ 2020-01-03 00:00:00 │ click │ │ 2 │ 0 │ +│ 2020-01-03 00:00:00 │ imp │ │ 2 │ 0 │ +└─────────────────────┴────────────┴─────────┴────────────┴──────┘ + +SET allow_experimental_alter_materialized_view_structure=1; + +ALTER TABLE mv MODIFY QUERY + SELECT toStartOfDay(ts) ts, event_type, browser, + count() events_cnt, + sum(cost) cost + FROM events + GROUP BY ts, event_type, browser; + +INSERT INTO events +SELECT Date '2020-01-03' + interval number * 900 second, + ['imp', 'click'][number%2+1], + ['firefox', 'safary', 'chrome'][number%3+1], + 10/(number+1)%33 +FROM numbers(100); + +SELECT ts, event_type, browser, sum(events_cnt) events_cnt, round(sum(cost),2) cost +FROM events_by_day +GROUP BY ts, event_type, browser +ORDER BY ts, event_type; + +┌──────────────────ts─┬─event_type─┬─browser─┬─events_cnt─┬──cost─┐ +│ 2020-01-01 00:00:00 │ click │ │ 48 │ 0 │ +│ 2020-01-01 00:00:00 │ imp │ │ 48 │ 0 │ +│ 2020-01-02 00:00:00 │ click │ │ 50 │ 0 │ +│ 2020-01-02 00:00:00 │ imp │ │ 50 │ 0 │ +│ 2020-01-03 00:00:00 │ click │ firefox │ 16 │ 6.84 │ +│ 2020-01-03 00:00:00 │ click │ │ 2 │ 0 │ +│ 2020-01-03 00:00:00 │ click │ safary │ 16 │ 9.82 │ +│ 2020-01-03 00:00:00 │ click │ chrome │ 16 │ 5.63 │ +│ 2020-01-03 00:00:00 │ imp │ │ 2 │ 0 │ +│ 2020-01-03 00:00:00 │ imp │ firefox │ 16 │ 15.14 │ +│ 2020-01-03 00:00:00 │ imp │ safary │ 16 │ 6.14 │ +│ 2020-01-03 00:00:00 │ imp │ chrome │ 16 │ 7.89 │ +│ 2020-01-04 00:00:00 │ click │ safary │ 1 │ 0.1 │ +│ 2020-01-04 00:00:00 │ click │ firefox │ 1 │ 0.1 │ +│ 2020-01-04 00:00:00 │ imp │ firefox │ 1 │ 0.1 │ +│ 2020-01-04 00:00:00 │ imp │ chrome │ 1 │ 0.1 │ +└─────────────────────┴────────────┴─────────┴────────────┴───────┘ + +-- !!! During `MODIFY ORDER BY` PRIMARY KEY was implicitly introduced. + +SHOW CREATE TABLE events_by_day FORMAT TSVRaw + +CREATE TABLE test.events_by_day +( + `ts` DateTime, + `event_type` String, + `browser` String, + `events_cnt` UInt64, + `cost` Float64 +) +ENGINE = SummingMergeTree +PRIMARY KEY (event_type, ts) +ORDER BY (event_type, ts, browser) +SETTINGS index_granularity = 8192 + +-- !!! The columns' definition is unchanged but it does not matter, we are not quering +-- MATERIALIZED VIEW, we are quering TO (storage) table. +-- SELECT section is updated. + +SHOW CREATE TABLE mv FORMAT TSVRaw; + +CREATE MATERIALIZED VIEW test.mv TO test.events_by_day +( + `ts` DateTime, + `event_type` String, + `events_cnt` UInt64 +) AS +SELECT + toStartOfDay(ts) AS ts, + event_type, + browser, + count() AS events_cnt, + sum(cost) AS cost +FROM test.events +GROUP BY + ts, + event_type, + browser +``` + +**Example without TO table** + +The application is very limited because you can only change the `SELECT` section without adding new columns. ```sql CREATE TABLE src_table (`a` UInt32) ENGINE = MergeTree ORDER BY a; @@ -25,6 +180,7 @@ SELECT * FROM mv; └───┘ ``` ```sql +set allow_experimental_alter_materialized_view_structure=1; ALTER TABLE mv MODIFY QUERY SELECT a * 2 as a FROM src_table; INSERT INTO src_table (a) VALUES (3), (4); SELECT * FROM mv; diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 8c4b8ab90a2..0209d59b018 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -16,15 +16,15 @@ The `CHECK TABLE` query compares actual file sizes with the expected values whic The query response contains the `result` column with a single row. The row has a value of [Boolean](../../sql-reference/data-types/boolean.md) type: -- 0 - The data in the table is corrupted. -- 1 - The data maintains integrity. +- 0 - The data in the table is corrupted. +- 1 - The data maintains integrity. The `CHECK TABLE` query supports the following table engines: -- [Log](../../engines/table-engines/log-family/log.md) -- [TinyLog](../../engines/table-engines/log-family/tinylog.md) -- [StripeLog](../../engines/table-engines/log-family/stripelog.md) -- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [Log](../../engines/table-engines/log-family/log.md) +- [TinyLog](../../engines/table-engines/log-family/tinylog.md) +- [StripeLog](../../engines/table-engines/log-family/stripelog.md) +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) Performed over the tables with another table engines causes an exception. diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index 7954d1362f1..a2f5b2b9fba 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -18,8 +18,8 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(.. If the `db_name` database already exists, then ClickHouse does not create a new database and: -- Doesn’t throw an exception if clause is specified. -- Throws an exception if clause isn’t specified. +- Doesn’t throw an exception if clause is specified. +- Throws an exception if clause isn’t specified. ### ON CLUSTER diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index e789dd9257f..29c72d62f24 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -5,7 +5,7 @@ sidebar_label: DICTIONARY title: "CREATE DICTIONARY" --- -Creates a new [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) with given [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) and [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). +Creates a new [dictionary](../../../sql-reference/dictionaries/index.md) with given [structure](../../../sql-reference/dictionaries/index.md#dictionary-key-and-fields), [source](../../../sql-reference/dictionaries/index.md#dictionary-sources), [layout](../../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory) and [lifetime](../../../sql-reference/dictionaries/index.md#dictionary-updates). ## Syntax @@ -29,7 +29,7 @@ The dictionary structure consists of attributes. Dictionary attributes are speci `ON CLUSTER` clause allows creating dictionary on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). -Depending on dictionary [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) one or more attributes can be specified as dictionary keys. +Depending on dictionary [layout](../../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory) one or more attributes can be specified as dictionary keys. ## SOURCE @@ -125,9 +125,9 @@ LAYOUT(HASHED()) ### Create a dictionary from another database -Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md/#dbms). +Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionaries/index.md#dictionary-sources/#dbms). **See Also** -- For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. -- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +- For more information, see the [Dictionaries](../../../sql-reference/dictionaries/index.md) section. +- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [Dictionaries](../../../sql-reference/dictionaries/index.md). diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 80d20e8ccad..db65cb4448c 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -2,11 +2,10 @@ slug: /en/sql-reference/statements/create/function sidebar_position: 38 sidebar_label: FUNCTION +title: "CREATE FUNCTION -user defined function (UDF)" --- -# CREATE FUNCTION — user defined function (UDF) - -Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls. +Creates a user defined function (UDF) from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls. **Syntax** @@ -17,9 +16,9 @@ A function can have an arbitrary number of parameters. There are a few restrictions: -- The name of a function must be unique among user defined and system functions. -- Recursive functions are not allowed. -- All variables used by a function must be specified in its parameter list. +- The name of a function must be unique among user defined and system functions. +- Recursive functions are not allowed. +- All variables used by a function must be specified in its parameter list. If any restriction is violated then an exception is raised. @@ -61,4 +60,6 @@ Result: ## Related Content -- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) +### [Executable UDFs](/docs/en/sql-reference/functions/udf.md). + +### [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index b9062249f65..14e29d051d7 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,13 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](../../../sql-reference/statements/create/database.md) +- [TABLE](../../../sql-reference/statements/create/table.md) +- [VIEW](../../../sql-reference/statements/create/view.md) +- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) +- [FUNCTION](../../../sql-reference/statements/create/function.md) +- [USER](../../../sql-reference/statements/create/user.md) +- [ROLE](../../../sql-reference/statements/create/role.md) +- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) +- [QUOTA](../../../sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index 3952743b480..7c31f93fff7 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -5,7 +5,7 @@ sidebar_label: QUOTA title: "CREATE QUOTA" --- -Creates a [quota](../../../operations/access-rights.md#quotas-management) that can be assigned to a user or a role. +Creates a [quota](../../../guides/sre/user-management/index.md#quotas-management) that can be assigned to a user or a role. Syntax: diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 68fdd51e957..9b14e220e1f 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -5,7 +5,7 @@ sidebar_label: ROLE title: "CREATE ROLE" --- -Creates new [roles](../../../operations/access-rights.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role. +Creates new [roles](../../../guides/sre/user-management/index.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role. Syntax: @@ -22,7 +22,7 @@ User can have default roles which apply at user login. To set default roles, use To revoke a role, use the [REVOKE](../../../sql-reference/statements/revoke.md) statement. -To delete role, use the [DROP ROLE](../../../sql-reference/statements/drop#drop-role-statement) statement. The deleted role is being automatically revoked from all the users and roles to which it was assigned. +To delete role, use the [DROP ROLE](../../../sql-reference/statements/drop.md#drop-role-statement) statement. The deleted role is being automatically revoked from all the users and roles to which it was assigned. ## Examples diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 31ce9221eea..83bb2e6bb9a 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -5,17 +5,17 @@ sidebar_label: ROW POLICY title: "CREATE ROW POLICY" --- -Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table. +Creates a [row policy](../../../guides/sre/user-management/index.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table. -:::warning +:::tip Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. ::: Syntax: ``` sql -CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1 - [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...] +CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.* + [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...] [FOR SELECT] USING condition [AS {PERMISSIVE | RESTRICTIVE}] [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}] @@ -31,7 +31,7 @@ In the section `TO` you can provide a list of users and roles this policy should Keyword `ALL` means all the ClickHouse users including current user. Keyword `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost` -:::note +:::note If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` @@ -76,6 +76,20 @@ CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio enables the user `peter` to see rows only if both `b=1` AND `c=2`. +Database policies are combined with table policies. + +For example, the following policies + +``` sql +CREATE ROW POLICY pol1 ON mydb.* USING b=1 TO mira, peter +CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio +``` + +enables the user `peter` to see table1 rows only if both `b=1` AND `c=2`, although +any other table in mydb would have only `b=1` policy applied for the user. + + + ## ON CLUSTER Clause Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). @@ -88,3 +102,5 @@ Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-re `CREATE ROW POLICY filter2 ON mydb.mytable USING a<1000 AND b=5 TO ALL EXCEPT mira` `CREATE ROW POLICY filter3 ON mydb.mytable USING 1 TO admin` + +`CREATE ROW POLICY filter4 ON mydb.* USING 1 TO admin` diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index c2424ff6046..8e221a4d82f 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -5,7 +5,7 @@ sidebar_label: SETTINGS PROFILE title: "CREATE SETTINGS PROFILE" --- -Creates [settings profiles](../../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role. +Creates [settings profiles](../../../guides/sre/user-management/index.md#settings-profiles-management) that can be assigned to a user or a role. Syntax: @@ -27,7 +27,7 @@ CREATE USER robin IDENTIFIED BY 'password'; Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`: ``` sql -CREATE -SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 +CREATE +SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin ``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 54977e1b0ab..de44a001472 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -110,25 +110,23 @@ If the type is not `Nullable` and if `NULL` is specified, it will be treated as See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting. -## Default Values +## Default Values {#default_values} -The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. +The column description can specify a default value expression in the form of `DEFAULT expr`, `MATERIALIZED expr`, or `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`. -Example: `URLDomain String DEFAULT domain(URL)`. +The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns. -If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `1970-01-01` for dates or zero unix timestamp for DateTime, NULL for Nullable. +The column type of a default value column can be omitted in which case it is inferred from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date. -If the default expression is defined, the column type is optional. If there isn’t an explicitly defined type, the default expression type is used. Example: `EventDate DEFAULT toDate(EventTime)` – the ‘Date’ type will be used for the ‘EventDate’ column. +If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`. -If the data type and default expression are defined explicitly, this expression will be cast to the specified type using type casting functions. Example: `Hits UInt32 DEFAULT 0` means the same thing as `Hits UInt32 DEFAULT toUInt32(0)`. - -Default expressions may be defined as an arbitrary expression from table constants and columns. When creating and changing the table structure, it checks that expressions do not contain loops. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed. +A default value expression `expr` may reference arbitrary table columns and constants. ClickHouse checks that changes of the table structure do not introduce loops in the expression calculation. For INSERT, it checks that expressions are resolvable – that all columns they can be calculated from have been passed. ### DEFAULT `DEFAULT expr` -Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression. +Normal default value. If the value of such a column is not specified in an INSERT query, it is computed from `expr`. Example: @@ -154,9 +152,9 @@ SELECT * FROM test; `MATERIALIZED expr` -Materialized expression. Such a column can’t be specified for INSERT, because it is always calculated. -For an INSERT without a list of columns, these columns are not considered. -In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries. + +Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`. Example: @@ -192,8 +190,9 @@ SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1; `EPHEMERAL [expr]` -Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. -INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Ephemeral column. Columns of this type are not stored in the table and it is not possible to SELECT from them. The only purpose of ephemeral columns is to build default value expressions of other columns from them. + +An insert without explicitly specified columns will skip columns of this type. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. Example: @@ -205,7 +204,7 @@ CREATE OR REPLACE TABLE test hexed FixedString(4) DEFAULT unhex(unhexed) ) ENGINE = MergeTree -ORDER BY id +ORDER BY id; INSERT INTO test (id, unhexed) Values (1, '5a90b714'); @@ -227,9 +226,9 @@ hex(hexed): 5A90B714 `ALIAS expr` -Synonym. Such a column isn’t stored in the table at all. -Its values can’t be inserted in a table, and it is not substituted when using an asterisk in a SELECT query. -It can be used in SELECTs if the alias is expanded during query parsing. +Calculated columns (synonym). Column of this type are not stored in the table and it is not possible to INSERT values into them. + +When SELECT queries explicitly reference columns of this type, the value is computed at query time from `expr`. By default, `SELECT *` excludes ALIAS columns. This behavior can be disabled with setting `asteriks_include_alias_columns`. When using the ALTER query to add new columns, old data for these columns is not written. Instead, when reading old data that does not have values for the new columns, expressions are computed on the fly by default. However, if running the expressions requires different columns that are not indicated in the query, these columns will additionally be read, but only for the blocks of data that need it. @@ -286,7 +285,7 @@ ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); ``` -:::warning +:::tip You can't combine both ways in one query. ::: @@ -314,7 +313,9 @@ Defines storage time for values. Can be specified only for MergeTree-family tabl ## Column Compression Codecs -By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) section of a server configuration. +By default, ClickHouse applies `lz4` compression in the self-managed version, and `zstd` in ClickHouse Cloud. + +For `MergeTree`-engine family you can change the default compression method in the [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) section of a server configuration. You can also define the compression method for each individual column in the `CREATE TABLE` query. @@ -342,16 +343,16 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default); Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`. -:::warning +:::tip You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. ::: Compression is supported for the following table engines: -- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family. Supports column compression codecs and selecting the default compression method by [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) settings. -- [Log](../../../engines/table-engines/log-family/index.md) family. Uses the `lz4` compression method by default and supports column compression codecs. -- [Set](../../../engines/table-engines/special/set.md). Only supported the default compression. -- [Join](../../../engines/table-engines/special/join.md). Only supported the default compression. +- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family. Supports column compression codecs and selecting the default compression method by [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) settings. +- [Log](../../../engines/table-engines/log-family/index.md) family. Uses the `lz4` compression method by default and supports column compression codecs. +- [Set](../../../engines/table-engines/special/set.md). Only supported the default compression. +- [Join](../../../engines/table-engines/special/join.md). Only supported the default compression. ClickHouse supports general purpose codecs and specialized codecs. @@ -379,10 +380,10 @@ High compression levels are useful for asymmetric scenarios, like compress once, `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: -- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. -- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions -- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device -- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512 +- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. +- DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. +- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. +- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. ### Specialized Codecs @@ -394,15 +395,15 @@ These codecs are designed to make compression more effective by using specific f #### DoubleDelta -`DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). #### Gorilla -`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078). +`Gorilla(bytes_size)` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078). #### FPC -`FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf). +`FPC(level, float_size)` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. Possible `level` values: 1-28, the default value is 12. Possible `float_size` values: 4, 8, the default value is `sizeof(type)` if type is Float. In all other cases, it’s 4. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf). #### T64 @@ -437,11 +438,11 @@ Encryption codecs: These codecs use a fixed nonce and encryption is therefore deterministic. This makes it compatible with deduplicating engines such as [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md) but has a weakness: when the same data block is encrypted twice, the resulting ciphertext will be exactly the same so an adversary who can read the disk can see this equivalence (although only the equivalence, without getting its content). -:::warning +:::note Most engines including the "\*MergeTree" family create index files on disk without applying codecs. This means plaintext will appear on disk if an encrypted column is indexed. ::: -:::warning +:::note If you perform a SELECT query mentioning a specific value in an encrypted column (such as in its WHERE clause), the value may appear in [system.query_log](../../../operations/system-tables/query_log.md). You may want to disable the logging. ::: @@ -473,12 +474,12 @@ ENGINE = MergeTree ORDER BY x; ClickHouse supports temporary tables which have the following characteristics: -- Temporary tables disappear when the session ends, including if the connection is lost. -- A temporary table uses the Memory engine only. -- The DB can’t be specified for a temporary table. It is created outside of databases. -- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session. -- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used. -- For distributed query processing, temporary tables used in a query are passed to remote servers. +- Temporary tables disappear when the session ends, including if the connection is lost. +- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines. +- The DB can’t be specified for a temporary table. It is created outside of databases. +- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session. +- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used. +- For distributed query processing, temporary tables used in a query are passed to remote servers. To create a temporary table, use the following syntax: @@ -488,7 +489,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) +) [ENGINE = engine] ``` In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections @@ -576,7 +577,7 @@ SELECT * FROM base.t1; You can add a comment to the table when you creating it. :::note -The comment is supported for all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). +The comment clause is supported by all table engines except [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) and [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). ::: diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index a756b3d4a0d..d168be63c36 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -5,7 +5,7 @@ sidebar_label: USER title: "CREATE USER" --- -Creates [user accounts](../../../operations/access-rights.md#user-account-management). +Creates [user accounts](../../../guides/sre/user-management/index.md#user-account-management). Syntax: @@ -26,36 +26,112 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] There are multiple ways of user identification: -- `IDENTIFIED WITH no_password` -- `IDENTIFIED WITH plaintext_password BY 'qwerty'` -- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` -- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` -- `IDENTIFIED WITH double_sha1_hash BY 'hash'` -- `IDENTIFIED WITH ldap SERVER 'server_name'` -- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` -- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` +- `IDENTIFIED WITH no_password` +- `IDENTIFIED WITH plaintext_password BY 'qwerty'` +- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` +- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` +- `IDENTIFIED WITH double_sha1_hash BY 'hash'` +- `IDENTIFIED WITH bcrypt_password BY 'qwerty'` +- `IDENTIFIED WITH bcrypt_hash BY 'hash'` +- `IDENTIFIED WITH ldap SERVER 'server_name'` +- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` +- `IDENTIFIED BY 'qwerty'` -For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. +## Examples + +1. The following username is `name1` and does not require a password - which obviously doesn't provide much security: + + ```sql + CREATE USER name1 NOT IDENTIFIED + ``` + +2. To specify a plaintext password: + + ```sql + CREATE USER name2 IDENTIFIED WITH plaintext_password BY 'my_password' + ``` + + :::tip + The password is stored in a SQL text file in `/var/lib/clickhouse/access`, so it's not a good idea to use `plaintext_password`. Try `sha256_password` instead, as demonstrated next... + ::: + +3. The most common option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example: + + ```sql + CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password' + ``` + + The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup: + + ```bash + /var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql + ATTACH USER name3 IDENTIFIED WITH sha256_hash BY '0C268556C1680BEF0640AAC1E7187566704208398DA31F03D18C74F5C5BE5053' SALT '4FB16307F5E10048196966DD7E6876AE53DE6A1D1F625488482C75F14A5097C7'; + ``` + + :::tip + If you have already created a hash value and corresponding salt value for a username, then you can use `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`. For identification with `sha256_hash` using `SALT` - hash must be calculated from concatenation of 'password' and 'salt'. + ::: + +4. The `double_sha1_password` is not typically needed, but comes in handy when working with clients that require it (like the MySQL interface): + + ```sql + CREATE USER name4 IDENTIFIED WITH double_sha1_password BY 'my_password' + ``` + + ClickHouse generates and runs the following query: + + ```response + CREATE USER name4 IDENTIFIED WITH double_sha1_hash BY 'CCD3A959D6A004B9C3807B728BC2E55B67E10518' + ``` + +5. The `bcrypt_password` is the most secure option for storing passwords. It uses the [bcrypt](https://en.wikipedia.org/wiki/Bcrypt) algorithm, which is resilient against brute force attacks even if the password hash is compromised. + + ```sql + CREATE USER name5 IDENTIFIED WITH bcrypt_password BY 'my_password' + ``` + + The length of the password is limited to 72 characters with this method. The bcrypt work factor parameter, which defines the amount of computations and time needed to compute the hash and verify the password, can be modified in the server configuration: + + ```xml + 12 + ``` + + The work factor must be between 4 and 31, with a default value of 12. + +6. The type of the password can also be omitted: + + ```sql + CREATE USER name6 IDENTIFIED BY 'my_password' + ``` + + In this case, ClickHouse will use the default password type specified in the server configuration: + + ```xml + sha256_password + ``` + + The available password types are: `plaintext_password`, `sha256_password`, `double_sha1_password`. ## User Host User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: -- `HOST IP 'ip_address_or_subnetwork'` — User can connect to ClickHouse server only from the specified IP address or a [subnetwork](https://en.wikipedia.org/wiki/Subnetwork). Examples: `HOST IP '192.168.0.0/16'`, `HOST IP '2001:DB8::/32'`. For use in production, only specify `HOST IP` elements (IP addresses and their masks), since using `host` and `host_regexp` might cause extra latency. -- `HOST ANY` — User can connect from any location. This is a default option. -- `HOST LOCAL` — User can connect only locally. -- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. -- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`. -- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. +- `HOST IP 'ip_address_or_subnetwork'` — User can connect to ClickHouse server only from the specified IP address or a [subnetwork](https://en.wikipedia.org/wiki/Subnetwork). Examples: `HOST IP '192.168.0.0/16'`, `HOST IP '2001:DB8::/32'`. For use in production, only specify `HOST IP` elements (IP addresses and their masks), since using `host` and `host_regexp` might cause extra latency. +- `HOST ANY` — User can connect from any location. This is a default option. +- `HOST LOCAL` — User can connect only locally. +- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. +- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`. +- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. Another way of specifying host is to use `@` syntax following the username. Examples: -- `CREATE USER mira@'127.0.0.1'` — Equivalent to the `HOST IP` syntax. -- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. -- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. +- `CREATE USER mira@'127.0.0.1'` — Equivalent to the `HOST IP` syntax. +- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. +- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. -:::warning +:::tip ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. ::: @@ -63,10 +139,10 @@ ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technica Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause: -- `user` — Specifies a user this user can grant privileges to. -- `role` — Specifies a role this user can grant privileges to. -- `ANY` — This user can grant privileges to anyone. It's the default setting. -- `NONE` — This user can grant privileges to none. +- `user` — Specifies a user this user can grant privileges to. +- `role` — Specifies a role this user can grant privileges to. +- `ANY` — This user can grant privileges to anyone. It's the default setting. +- `NONE` — This user can grant privileges to none. You can exclude any user or role by using the `EXCEPT` expression. For example, `CREATE USER user1 GRANTEES ANY EXCEPT user2`. It means if `user1` has some privileges granted with `GRANT OPTION` it will be able to grant those privileges to anyone except `user2`. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index acdede3c673..10b15638152 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views. Materialized views in ClickHouse are implemented more like insert triggers. If there’s some aggregation in the view query, it’s applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view. + +Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not. + +By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table. + +Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables. ::: If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it. @@ -229,7 +235,7 @@ Most common uses of live view tables include: - Watching metrics from system tables using periodic refresh. **See Also** -- [ALTER LIVE VIEW](../alter/view.md#alter-live-view) +- [ALTER LIVE VIEW](../alter/view.md#alter-live-view) ## Window View [Experimental] @@ -358,3 +364,4 @@ The window view is useful in the following scenarios: ## Related Content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse) diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md index e1987e50af4..fa9f08e225f 100644 --- a/docs/en/sql-reference/statements/delete.md +++ b/docs/en/sql-reference/statements/delete.md @@ -8,7 +8,7 @@ title: DELETE Statement --- ``` sql -DELETE FROM [db.]table [ON CLUSTER cluster] [WHERE expr] +DELETE FROM [db.]table [ON CLUSTER cluster] WHERE expr ``` `DELETE FROM` removes rows from the table `[db.]table` that match the expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in the background. This feature is only available for the MergeTree table engine family. @@ -21,21 +21,6 @@ DELETE FROM hits WHERE Title LIKE '%hello%'; Lightweight deletes are asynchronous by default. Set `mutations_sync` equal to 1 to wait for one replica to process the statement, and set `mutations_sync` to 2 to wait for all replicas. -:::note -This feature is experimental and requires you to set `allow_experimental_lightweight_delete` to true: - -```sql -SET allow_experimental_lightweight_delete = true; -``` - -::: - -An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster. - -:::warning -Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Lightweight deletes are currently efficient for wide parts, but for compact parts, they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios. -::: - :::note `DELETE FROM` requires the `ALTER DELETE` privilege: ```sql @@ -51,7 +36,7 @@ The idea behind Lightweight Delete is that when a `DELETE FROM table ...` query The mask is implemented as a hidden `_row_exists` system column that stores True for all visible rows and False for deleted ones. This column is only present in a part if some rows in this part were deleted. In other words, the column is not persisted when it has all values equal to True. ## SELECT query -When the column is present `SELECT ... FROM table WHERE condition` query internally is extended by an additional predicate on `_row_exists` and becomes similar to +When the column is present `SELECT ... FROM table WHERE condition` query internally is extended by an additional predicate on `_row_exists` and becomes similar to ```sql SELECT ... FROM table PREWHERE _row_exists WHERE condition ``` @@ -70,6 +55,7 @@ With the described implementation now we can see what can negatively affect 'DEL - Table having a very large number of data parts - Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file. -:::note -This implementation might change in the future. -::: + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index 4864743abbc..20f7061dedd 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -15,18 +15,18 @@ DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] The `DESCRIBE` statement returns a row for each table column with the following [String](../../sql-reference/data-types/string.md) values: -- `name` — A column name. -- `type` — A column type. -- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned. -- `default_expression` — An expression specified after the `DEFAULT` clause. -- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column). -- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column. -- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression. -- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. +- `name` — A column name. +- `type` — A column type. +- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned. +- `default_expression` — An expression specified after the `DEFAULT` clause. +- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column). +- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column. +- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression. +- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. -All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot. +All columns in [Nested](../../sql-reference/data-types/nested-data-structures/index.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot. -To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. +To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. **Example** @@ -66,4 +66,4 @@ The second query additionally shows subcolumns: **See Also** -- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. +- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index aa87b1ef613..6f08168bbef 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -22,7 +22,7 @@ System log tables can be also attached back (e.g. `query_log`, `text_log`, etc). Note that you can not detach permanently the table which is already detached (temporary). But you can attach it back and then detach permanently again. -Also you can not [DROP](../../sql-reference/statements/drop#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query. +Also you can not [DROP](../../sql-reference/statements/drop.md#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query. The `SYNC` modifier executes the action without delay. @@ -72,5 +72,5 @@ Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table defa **See Also** -- [Materialized View](../../sql-reference/statements/create/view.md#materialized) -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Materialized View](../../sql-reference/statements/create/view.md#materialized) +- [Dictionaries](../../sql-reference/dictionaries/index.md) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 8a83a8fae1d..b6208c2fd52 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -22,6 +22,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] Deletes the table. +:::tip +Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +::: + Syntax: ``` sql diff --git a/docs/en/sql-reference/statements/exchange.md b/docs/en/sql-reference/statements/exchange.md index 33f3e08d547..babb29ae977 100644 --- a/docs/en/sql-reference/statements/exchange.md +++ b/docs/en/sql-reference/statements/exchange.md @@ -41,4 +41,4 @@ EXCHANGE DICTIONARIES [db0.]dict_A AND [db1.]dict_B [ON CLUSTER cluster] **See Also** -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Dictionaries](../../sql-reference/dictionaries/index.md) diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 5081abf2fb8..1c93707402f 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -45,11 +45,11 @@ Union ## EXPLAIN Types -- `AST` — Abstract syntax tree. -- `SYNTAX` — Query text after AST-level optimizations. -- `QUERY TREE` — Query tree after Query Tree level optimizations. -- `PLAN` — Query execution plan. -- `PIPELINE` — Query execution pipeline. +- `AST` — Abstract syntax tree. +- `SYNTAX` — Query text after AST-level optimizations. +- `QUERY TREE` — Query tree after Query Tree level optimizations. +- `PLAN` — Query execution plan. +- `PIPELINE` — Query execution pipeline. ### EXPLAIN AST @@ -115,9 +115,9 @@ CROSS JOIN system.numbers AS c Settings: -- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. -- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. -- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. +- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. +- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. +- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. Example: ```sql @@ -143,11 +143,11 @@ Dump query plan steps. Settings: -- `header` — Prints output header for step. Default: 0. -- `description` — Prints step description. Default: 1. -- `indexes` — Shows used indexes, the number of filtered parts and the number of filtered granules for every index applied. Default: 0. Supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. -- `actions` — Prints detailed information about step actions. Default: 0. -- `json` — Prints query plan steps as a row in [JSON](../../interfaces/formats.md#json) format. Default: 0. It is recommended to use [TSVRaw](../../interfaces/formats.md#tabseparatedraw) format to avoid unnecessary escaping. +- `header` — Prints output header for step. Default: 0. +- `description` — Prints step description. Default: 1. +- `indexes` — Shows used indexes, the number of filtered parts and the number of filtered granules for every index applied. Default: 0. Supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. +- `actions` — Prints detailed information about step actions. Default: 0. +- `json` — Prints query plan steps as a row in [JSON](../../interfaces/formats.md#json) format. Default: 0. It is recommended to use [TSVRaw](../../interfaces/formats.md#tabseparatedraw) format to avoid unnecessary escaping. Example: @@ -276,12 +276,12 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; With `indexes` = 1, the `Indexes` key is added. It contains an array of used indexes. Each index is described as JSON with `Type` key (a string `MinMax`, `Partition`, `PrimaryKey` or `Skip`) and optional keys: -- `Name` — The index name (currently only used for `Skip` indexes). -- `Keys` — The array of columns used by the index. -- `Condition` — The used condition. -- `Description` — The index description (currently only used for `Skip` indexes). -- `Parts` — The number of parts before/after the index is applied. -- `Granules` — The number of granules before/after the index is applied. +- `Name` — The index name (currently only used for `Skip` indexes). +- `Keys` — The array of columns used by the index. +- `Condition` — The used condition. +- `Description` — The index description (currently only used for `Skip` indexes). +- `Parts` — The number of parts before/after the index is applied. +- `Granules` — The number of granules before/after the index is applied. Example: @@ -380,9 +380,9 @@ EXPLAIN json = 1, actions = 1, description = 0 SELECT 1 FORMAT TSVRaw; Settings: -- `header` — Prints header for each output port. Default: 0. -- `graph` — Prints a graph described in the [DOT](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) graph description language. Default: 0. -- `compact` — Prints graph in compact mode if `graph` setting is enabled. Default: 1. +- `header` — Prints header for each output port. Default: 0. +- `graph` — Prints a graph described in the [DOT](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) graph description language. Default: 0. +- `compact` — Prints graph in compact mode if `graph` setting is enabled. Default: 1. Example: diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 3383ea70a2b..e6073f3523a 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -6,10 +6,10 @@ sidebar_label: GRANT # GRANT Statement -- Grants [privileges](#grant-privileges) to ClickHouse user accounts or roles. -- Assigns roles to user accounts or to the other roles. +- Grants [privileges](#grant-privileges) to ClickHouse user accounts or roles. +- Assigns roles to user accounts or to the other roles. -To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md) statement. Also you can list granted privileges with the [SHOW GRANTS](../../sql-reference/statements/show.md#show-grants-statement) statement. +To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md) statement. Also you can list granted privileges with the [SHOW GRANTS](../../sql-reference/statements/show.md#show-grants) statement. ## Granting Privilege Syntax @@ -17,9 +17,9 @@ To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md) GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] ``` -- `privilege` — Type of privilege. -- `role` — ClickHouse user role. -- `user` — ClickHouse user account. +- `privilege` — Type of privilege. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less. The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if is not specified it appends privileges. @@ -30,12 +30,24 @@ The `WITH REPLACE OPTION` clause replace old privileges by new privileges for th GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] [WITH REPLACE OPTION] ``` -- `role` — ClickHouse user role. -- `user` — ClickHouse user account. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles. +## Grant Current Grants Syntax +``` sql +GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +``` + +- `privilege` — Type of privilege. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. + +Using the `CURRENT GRANTS` statement allows you to give all specified privileges to the given user or role. +If none of the privileges were specified, then the given user or role will receive all available privileges for `CURRENT_USER`. + ## Usage To use `GRANT`, your account must have the `GRANT OPTION` privilege. You can grant privileges only inside the scope of your account privileges. @@ -48,9 +60,9 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION It means that `john` has the permission to execute: -- `SELECT x,y FROM db.table`. -- `SELECT x FROM db.table`. -- `SELECT y FROM db.table`. +- `SELECT x,y FROM db.table`. +- `SELECT x FROM db.table`. +- `SELECT y FROM db.table`. `john` can’t execute `SELECT z FROM db.table`. The `SELECT * FROM db.table` also is not available. Processing this query, ClickHouse does not return any data, even `x` and `y`. The only exception is if a table contains only `x` and `y` columns. In this case ClickHouse returns all the data. @@ -70,150 +82,152 @@ Privileges have a hierarchical structure. A set of permitted queries depends on Hierarchy of privileges: -- [SELECT](#grant-select) -- [INSERT](#grant-insert) -- [ALTER](#grant-alter) - - `ALTER TABLE` - - `ALTER UPDATE` - - `ALTER DELETE` - - `ALTER COLUMN` - - `ALTER ADD COLUMN` - - `ALTER DROP COLUMN` - - `ALTER MODIFY COLUMN` - - `ALTER COMMENT COLUMN` - - `ALTER CLEAR COLUMN` - - `ALTER RENAME COLUMN` - - `ALTER INDEX` - - `ALTER ORDER BY` - - `ALTER SAMPLE BY` - - `ALTER ADD INDEX` - - `ALTER DROP INDEX` - - `ALTER MATERIALIZE INDEX` - - `ALTER CLEAR INDEX` - - `ALTER CONSTRAINT` - - `ALTER ADD CONSTRAINT` - - `ALTER DROP CONSTRAINT` - - `ALTER TTL` - - `ALTER MATERIALIZE TTL` - - `ALTER SETTINGS` - - `ALTER MOVE PARTITION` - - `ALTER FETCH PARTITION` - - `ALTER FREEZE PARTITION` - - `ALTER VIEW` - - `ALTER VIEW REFRESH` - - `ALTER VIEW MODIFY QUERY` -- [CREATE](#grant-create) - - `CREATE DATABASE` - - `CREATE TABLE` - - `CREATE TEMPORARY TABLE` - - `CREATE VIEW` - - `CREATE DICTIONARY` - - `CREATE FUNCTION` -- [DROP](#grant-drop) - - `DROP DATABASE` - - `DROP TABLE` - - `DROP VIEW` - - `DROP DICTIONARY` - - `DROP FUNCTION` -- [TRUNCATE](#grant-truncate) -- [OPTIMIZE](#grant-optimize) -- [SHOW](#grant-show) - - `SHOW DATABASES` - - `SHOW TABLES` - - `SHOW COLUMNS` - - `SHOW DICTIONARIES` -- [KILL QUERY](#grant-kill-query) -- [ACCESS MANAGEMENT](#grant-access-management) - - `CREATE USER` - - `ALTER USER` - - `DROP USER` - - `CREATE ROLE` - - `ALTER ROLE` - - `DROP ROLE` - - `CREATE ROW POLICY` - - `ALTER ROW POLICY` - - `DROP ROW POLICY` - - `CREATE QUOTA` - - `ALTER QUOTA` - - `DROP QUOTA` - - `CREATE SETTINGS PROFILE` - - `ALTER SETTINGS PROFILE` - - `DROP SETTINGS PROFILE` - - `SHOW ACCESS` - - `SHOW_USERS` - - `SHOW_ROLES` - - `SHOW_ROW_POLICIES` - - `SHOW_QUOTAS` - - `SHOW_SETTINGS_PROFILES` - - `ROLE ADMIN` -- [SYSTEM](#grant-system) - - `SYSTEM SHUTDOWN` - - `SYSTEM DROP CACHE` - - `SYSTEM DROP DNS CACHE` - - `SYSTEM DROP MARK CACHE` - - `SYSTEM DROP UNCOMPRESSED CACHE` - - `SYSTEM RELOAD` - - `SYSTEM RELOAD CONFIG` - - `SYSTEM RELOAD DICTIONARY` - - `SYSTEM RELOAD EMBEDDED DICTIONARIES` - - `SYSTEM RELOAD FUNCTION` - - `SYSTEM RELOAD FUNCTIONS` - - `SYSTEM MERGES` - - `SYSTEM TTL MERGES` - - `SYSTEM FETCHES` - - `SYSTEM MOVES` - - `SYSTEM SENDS` - - `SYSTEM DISTRIBUTED SENDS` - - `SYSTEM REPLICATED SENDS` - - `SYSTEM REPLICATION QUEUES` - - `SYSTEM SYNC REPLICA` - - `SYSTEM RESTART REPLICA` - - `SYSTEM FLUSH` - - `SYSTEM FLUSH DISTRIBUTED` - - `SYSTEM FLUSH LOGS` - - `CLUSTER` (see also `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive) -- [INTROSPECTION](#grant-introspection) - - `addressToLine` - - `addressToLineWithInlines` - - `addressToSymbol` - - `demangle` -- [SOURCES](#grant-sources) - - `FILE` - - `URL` - - `REMOTE` - - `YSQL` - - `ODBC` - - `JDBC` - - `HDFS` - - `S3` -- [dictGet](#grant-dictget) +- [SELECT](#grant-select) +- [INSERT](#grant-insert) +- [ALTER](#grant-alter) + - `ALTER TABLE` + - `ALTER UPDATE` + - `ALTER DELETE` + - `ALTER COLUMN` + - `ALTER ADD COLUMN` + - `ALTER DROP COLUMN` + - `ALTER MODIFY COLUMN` + - `ALTER COMMENT COLUMN` + - `ALTER CLEAR COLUMN` + - `ALTER RENAME COLUMN` + - `ALTER INDEX` + - `ALTER ORDER BY` + - `ALTER SAMPLE BY` + - `ALTER ADD INDEX` + - `ALTER DROP INDEX` + - `ALTER MATERIALIZE INDEX` + - `ALTER CLEAR INDEX` + - `ALTER CONSTRAINT` + - `ALTER ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT` + - `ALTER TTL` + - `ALTER MATERIALIZE TTL` + - `ALTER SETTINGS` + - `ALTER MOVE PARTITION` + - `ALTER FETCH PARTITION` + - `ALTER FREEZE PARTITION` + - `ALTER VIEW` + - `ALTER VIEW REFRESH` + - `ALTER VIEW MODIFY QUERY` +- [CREATE](#grant-create) + - `CREATE DATABASE` + - `CREATE TABLE` + - `CREATE ARBITRARY TEMPORARY TABLE` + - `CREATE TEMPORARY TABLE` + - `CREATE VIEW` + - `CREATE DICTIONARY` + - `CREATE FUNCTION` +- [DROP](#grant-drop) + - `DROP DATABASE` + - `DROP TABLE` + - `DROP VIEW` + - `DROP DICTIONARY` + - `DROP FUNCTION` +- [TRUNCATE](#grant-truncate) +- [OPTIMIZE](#grant-optimize) +- [SHOW](#grant-show) + - `SHOW DATABASES` + - `SHOW TABLES` + - `SHOW COLUMNS` + - `SHOW DICTIONARIES` +- [KILL QUERY](#grant-kill-query) +- [ACCESS MANAGEMENT](#grant-access-management) + - `CREATE USER` + - `ALTER USER` + - `DROP USER` + - `CREATE ROLE` + - `ALTER ROLE` + - `DROP ROLE` + - `CREATE ROW POLICY` + - `ALTER ROW POLICY` + - `DROP ROW POLICY` + - `CREATE QUOTA` + - `ALTER QUOTA` + - `DROP QUOTA` + - `CREATE SETTINGS PROFILE` + - `ALTER SETTINGS PROFILE` + - `DROP SETTINGS PROFILE` + - `SHOW ACCESS` + - `SHOW_USERS` + - `SHOW_ROLES` + - `SHOW_ROW_POLICIES` + - `SHOW_QUOTAS` + - `SHOW_SETTINGS_PROFILES` + - `ROLE ADMIN` +- [SYSTEM](#grant-system) + - `SYSTEM SHUTDOWN` + - `SYSTEM DROP CACHE` + - `SYSTEM DROP DNS CACHE` + - `SYSTEM DROP MARK CACHE` + - `SYSTEM DROP UNCOMPRESSED CACHE` + - `SYSTEM RELOAD` + - `SYSTEM RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES` + - `SYSTEM RELOAD FUNCTION` + - `SYSTEM RELOAD FUNCTIONS` + - `SYSTEM MERGES` + - `SYSTEM TTL MERGES` + - `SYSTEM FETCHES` + - `SYSTEM MOVES` + - `SYSTEM SENDS` + - `SYSTEM DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA` + - `SYSTEM RESTART REPLICA` + - `SYSTEM FLUSH` + - `SYSTEM FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS` + - `CLUSTER` (see also `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive) +- [INTROSPECTION](#grant-introspection) + - `addressToLine` + - `addressToLineWithInlines` + - `addressToSymbol` + - `demangle` +- [SOURCES](#grant-sources) + - `FILE` + - `URL` + - `REMOTE` + - `YSQL` + - `ODBC` + - `JDBC` + - `HDFS` + - `S3` +- [dictGet](#grant-dictget) +- [displaySecretsInShowAndSelect](#grant-display-secrets) Examples of how this hierarchy is treated: -- The `ALTER` privilege includes all other `ALTER*` privileges. -- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. +- The `ALTER` privilege includes all other `ALTER*` privileges. +- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. Privileges are applied at different levels. Knowing of a level suggests syntax available for privilege. Levels (from lower to higher): -- `COLUMN` — Privilege can be granted for column, table, database, or globally. -- `TABLE` — Privilege can be granted for table, database, or globally. -- `VIEW` — Privilege can be granted for view, database, or globally. -- `DICTIONARY` — Privilege can be granted for dictionary, database, or globally. -- `DATABASE` — Privilege can be granted for database or globally. -- `GLOBAL` — Privilege can be granted only globally. -- `GROUP` — Groups privileges of different levels. When `GROUP`-level privilege is granted, only that privileges from the group are granted which correspond to the used syntax. +- `COLUMN` — Privilege can be granted for column, table, database, or globally. +- `TABLE` — Privilege can be granted for table, database, or globally. +- `VIEW` — Privilege can be granted for view, database, or globally. +- `DICTIONARY` — Privilege can be granted for dictionary, database, or globally. +- `DATABASE` — Privilege can be granted for database or globally. +- `GLOBAL` — Privilege can be granted only globally. +- `GROUP` — Groups privileges of different levels. When `GROUP`-level privilege is granted, only that privileges from the group are granted which correspond to the used syntax. Examples of allowed syntax: -- `GRANT SELECT(x) ON db.table TO user` -- `GRANT SELECT ON db.* TO user` +- `GRANT SELECT(x) ON db.table TO user` +- `GRANT SELECT ON db.* TO user` Examples of disallowed syntax: -- `GRANT CREATE USER(x) ON db.table TO user` -- `GRANT CREATE USER ON db.* TO user` +- `GRANT CREATE USER(x) ON db.table TO user` +- `GRANT CREATE USER ON db.* TO user` The special privilege [ALL](#grant-all) grants all the privileges to a user account or a role. @@ -263,73 +277,74 @@ The granted privilege allows `john` to insert data to the `x` and/or `y` columns Allows executing [ALTER](../../sql-reference/statements/alter/index.md) queries according to the following hierarchy of privileges: -- `ALTER`. Level: `COLUMN`. - - `ALTER TABLE`. Level: `GROUP` - - `ALTER UPDATE`. Level: `COLUMN`. Aliases: `UPDATE` - - `ALTER DELETE`. Level: `COLUMN`. Aliases: `DELETE` - - `ALTER COLUMN`. Level: `GROUP` - - `ALTER ADD COLUMN`. Level: `COLUMN`. Aliases: `ADD COLUMN` - - `ALTER DROP COLUMN`. Level: `COLUMN`. Aliases: `DROP COLUMN` - - `ALTER MODIFY COLUMN`. Level: `COLUMN`. Aliases: `MODIFY COLUMN` - - `ALTER COMMENT COLUMN`. Level: `COLUMN`. Aliases: `COMMENT COLUMN` - - `ALTER CLEAR COLUMN`. Level: `COLUMN`. Aliases: `CLEAR COLUMN` - - `ALTER RENAME COLUMN`. Level: `COLUMN`. Aliases: `RENAME COLUMN` - - `ALTER INDEX`. Level: `GROUP`. Aliases: `INDEX` - - `ALTER ORDER BY`. Level: `TABLE`. Aliases: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` - - `ALTER SAMPLE BY`. Level: `TABLE`. Aliases: `ALTER MODIFY SAMPLE BY`, `MODIFY SAMPLE BY` - - `ALTER ADD INDEX`. Level: `TABLE`. Aliases: `ADD INDEX` - - `ALTER DROP INDEX`. Level: `TABLE`. Aliases: `DROP INDEX` - - `ALTER MATERIALIZE INDEX`. Level: `TABLE`. Aliases: `MATERIALIZE INDEX` - - `ALTER CLEAR INDEX`. Level: `TABLE`. Aliases: `CLEAR INDEX` - - `ALTER CONSTRAINT`. Level: `GROUP`. Aliases: `CONSTRAINT` - - `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT` - - `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT` - - `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL` - - `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL` - - `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` - - `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` - - `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART` - - `ALTER FREEZE PARTITION`. Level: `TABLE`. Aliases: `FREEZE PARTITION` - - `ALTER VIEW` Level: `GROUP` - - `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` - - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` +- `ALTER`. Level: `COLUMN`. + - `ALTER TABLE`. Level: `GROUP` + - `ALTER UPDATE`. Level: `COLUMN`. Aliases: `UPDATE` + - `ALTER DELETE`. Level: `COLUMN`. Aliases: `DELETE` + - `ALTER COLUMN`. Level: `GROUP` + - `ALTER ADD COLUMN`. Level: `COLUMN`. Aliases: `ADD COLUMN` + - `ALTER DROP COLUMN`. Level: `COLUMN`. Aliases: `DROP COLUMN` + - `ALTER MODIFY COLUMN`. Level: `COLUMN`. Aliases: `MODIFY COLUMN` + - `ALTER COMMENT COLUMN`. Level: `COLUMN`. Aliases: `COMMENT COLUMN` + - `ALTER CLEAR COLUMN`. Level: `COLUMN`. Aliases: `CLEAR COLUMN` + - `ALTER RENAME COLUMN`. Level: `COLUMN`. Aliases: `RENAME COLUMN` + - `ALTER INDEX`. Level: `GROUP`. Aliases: `INDEX` + - `ALTER ORDER BY`. Level: `TABLE`. Aliases: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` + - `ALTER SAMPLE BY`. Level: `TABLE`. Aliases: `ALTER MODIFY SAMPLE BY`, `MODIFY SAMPLE BY` + - `ALTER ADD INDEX`. Level: `TABLE`. Aliases: `ADD INDEX` + - `ALTER DROP INDEX`. Level: `TABLE`. Aliases: `DROP INDEX` + - `ALTER MATERIALIZE INDEX`. Level: `TABLE`. Aliases: `MATERIALIZE INDEX` + - `ALTER CLEAR INDEX`. Level: `TABLE`. Aliases: `CLEAR INDEX` + - `ALTER CONSTRAINT`. Level: `GROUP`. Aliases: `CONSTRAINT` + - `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT` + - `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL` + - `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL` + - `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` + - `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` + - `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART` + - `ALTER FREEZE PARTITION`. Level: `TABLE`. Aliases: `FREEZE PARTITION` + - `ALTER VIEW` Level: `GROUP` + - `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` + - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` Examples of how this hierarchy is treated: -- The `ALTER` privilege includes all other `ALTER*` privileges. -- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. +- The `ALTER` privilege includes all other `ALTER*` privileges. +- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. **Notes** -- The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters. -- The `ATTACH` operation needs the [CREATE](#grant-create) privilege. -- The `DETACH` operation needs the [DROP](#grant-drop) privilege. -- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. +- The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters. +- The `ATTACH` operation needs the [CREATE](#grant-create) privilege. +- The `DETACH` operation needs the [DROP](#grant-drop) privilege. +- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. ### CREATE Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/attach.md) DDL-queries according to the following hierarchy of privileges: -- `CREATE`. Level: `GROUP` - - `CREATE DATABASE`. Level: `DATABASE` - - `CREATE TABLE`. Level: `TABLE` - - `CREATE TEMPORARY TABLE`. Level: `GLOBAL` - - `CREATE VIEW`. Level: `VIEW` - - `CREATE DICTIONARY`. Level: `DICTIONARY` +- `CREATE`. Level: `GROUP` + - `CREATE DATABASE`. Level: `DATABASE` + - `CREATE TABLE`. Level: `TABLE` + - `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL` + - `CREATE TEMPORARY TABLE`. Level: `GLOBAL` + - `CREATE VIEW`. Level: `VIEW` + - `CREATE DICTIONARY`. Level: `DICTIONARY` **Notes** -- To delete the created table, a user needs [DROP](#grant-drop). +- To delete the created table, a user needs [DROP](#grant-drop). ### DROP Allows executing [DROP](../../sql-reference/statements/drop.md) and [DETACH](../../sql-reference/statements/detach.md) queries according to the following hierarchy of privileges: -- `DROP`. Level: `GROUP` - - `DROP DATABASE`. Level: `DATABASE` - - `DROP TABLE`. Level: `TABLE` - - `DROP VIEW`. Level: `VIEW` - - `DROP DICTIONARY`. Level: `DICTIONARY` +- `DROP`. Level: `GROUP` + - `DROP DATABASE`. Level: `DATABASE` + - `DROP TABLE`. Level: `TABLE` + - `DROP VIEW`. Level: `VIEW` + - `DROP DICTIONARY`. Level: `DICTIONARY` ### TRUNCATE @@ -347,11 +362,11 @@ Privilege level: `TABLE`. Allows executing `SHOW`, `DESCRIBE`, `USE`, and `EXISTS` queries according to the following hierarchy of privileges: -- `SHOW`. Level: `GROUP` - - `SHOW DATABASES`. Level: `DATABASE`. Allows to execute `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` queries. - - `SHOW TABLES`. Level: `TABLE`. Allows to execute `SHOW TABLES`, `EXISTS `, `CHECK
` queries. - - `SHOW COLUMNS`. Level: `COLUMN`. Allows to execute `SHOW CREATE TABLE`, `DESCRIBE` queries. - - `SHOW DICTIONARIES`. Level: `DICTIONARY`. Allows to execute `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` queries. +- `SHOW`. Level: `GROUP` + - `SHOW DATABASES`. Level: `DATABASE`. Allows to execute `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` queries. + - `SHOW TABLES`. Level: `TABLE`. Allows to execute `SHOW TABLES`, `EXISTS
`, `CHECK
` queries. + - `SHOW COLUMNS`. Level: `COLUMN`. Allows to execute `SHOW CREATE TABLE`, `DESCRIBE` queries. + - `SHOW DICTIONARIES`. Level: `DICTIONARY`. Allows to execute `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` queries. **Notes** @@ -371,29 +386,29 @@ Privilege level: `GLOBAL`. Allows a user to execute queries that manage users, roles and row policies. -- `ACCESS MANAGEMENT`. Level: `GROUP` - - `CREATE USER`. Level: `GLOBAL` - - `ALTER USER`. Level: `GLOBAL` - - `DROP USER`. Level: `GLOBAL` - - `CREATE ROLE`. Level: `GLOBAL` - - `ALTER ROLE`. Level: `GLOBAL` - - `DROP ROLE`. Level: `GLOBAL` - - `ROLE ADMIN`. Level: `GLOBAL` - - `CREATE ROW POLICY`. Level: `GLOBAL`. Aliases: `CREATE POLICY` - - `ALTER ROW POLICY`. Level: `GLOBAL`. Aliases: `ALTER POLICY` - - `DROP ROW POLICY`. Level: `GLOBAL`. Aliases: `DROP POLICY` - - `CREATE QUOTA`. Level: `GLOBAL` - - `ALTER QUOTA`. Level: `GLOBAL` - - `DROP QUOTA`. Level: `GLOBAL` - - `CREATE SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `CREATE PROFILE` - - `ALTER SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `ALTER PROFILE` - - `DROP SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `DROP PROFILE` - - `SHOW ACCESS`. Level: `GROUP` - - `SHOW_USERS`. Level: `GLOBAL`. Aliases: `SHOW CREATE USER` - - `SHOW_ROLES`. Level: `GLOBAL`. Aliases: `SHOW CREATE ROLE` - - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` - - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` - - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` +- `ACCESS MANAGEMENT`. Level: `GROUP` + - `CREATE USER`. Level: `GLOBAL` + - `ALTER USER`. Level: `GLOBAL` + - `DROP USER`. Level: `GLOBAL` + - `CREATE ROLE`. Level: `GLOBAL` + - `ALTER ROLE`. Level: `GLOBAL` + - `DROP ROLE`. Level: `GLOBAL` + - `ROLE ADMIN`. Level: `GLOBAL` + - `CREATE ROW POLICY`. Level: `GLOBAL`. Aliases: `CREATE POLICY` + - `ALTER ROW POLICY`. Level: `GLOBAL`. Aliases: `ALTER POLICY` + - `DROP ROW POLICY`. Level: `GLOBAL`. Aliases: `DROP POLICY` + - `CREATE QUOTA`. Level: `GLOBAL` + - `ALTER QUOTA`. Level: `GLOBAL` + - `DROP QUOTA`. Level: `GLOBAL` + - `CREATE SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `CREATE PROFILE` + - `ALTER SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `ALTER PROFILE` + - `DROP SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `DROP PROFILE` + - `SHOW ACCESS`. Level: `GROUP` + - `SHOW_USERS`. Level: `GLOBAL`. Aliases: `SHOW CREATE USER` + - `SHOW_ROLES`. Level: `GLOBAL`. Aliases: `SHOW CREATE ROLE` + - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` + - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` + - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` The `ROLE ADMIN` privilege allows a user to assign and revoke any roles including those which are not assigned to the user with the admin option. @@ -401,29 +416,29 @@ The `ROLE ADMIN` privilege allows a user to assign and revoke any roles includin Allows a user to execute [SYSTEM](../../sql-reference/statements/system.md) queries according to the following hierarchy of privileges. -- `SYSTEM`. Level: `GROUP` - - `SYSTEM SHUTDOWN`. Level: `GLOBAL`. Aliases: `SYSTEM KILL`, `SHUTDOWN` - - `SYSTEM DROP CACHE`. Aliases: `DROP CACHE` - - `SYSTEM DROP DNS CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` - - `SYSTEM DROP MARK CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` - - `SYSTEM DROP UNCOMPRESSED CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` - - `SYSTEM RELOAD`. Level: `GROUP` - - `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG` - - `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` - - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: `RELOAD EMBEDDED DICTIONARIES` - - `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` - - `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` - - `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` - - `SYSTEM MOVES`. Level: `TABLE`. Aliases: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` - - `SYSTEM SENDS`. Level: `GROUP`. Aliases: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` - - `SYSTEM DISTRIBUTED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` - - `SYSTEM REPLICATED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` - - `SYSTEM REPLICATION QUEUES`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` - - `SYSTEM SYNC REPLICA`. Level: `TABLE`. Aliases: `SYNC REPLICA` - - `SYSTEM RESTART REPLICA`. Level: `TABLE`. Aliases: `RESTART REPLICA` - - `SYSTEM FLUSH`. Level: `GROUP` - - `SYSTEM FLUSH DISTRIBUTED`. Level: `TABLE`. Aliases: `FLUSH DISTRIBUTED` - - `SYSTEM FLUSH LOGS`. Level: `GLOBAL`. Aliases: `FLUSH LOGS` +- `SYSTEM`. Level: `GROUP` + - `SYSTEM SHUTDOWN`. Level: `GLOBAL`. Aliases: `SYSTEM KILL`, `SHUTDOWN` + - `SYSTEM DROP CACHE`. Aliases: `DROP CACHE` + - `SYSTEM DROP DNS CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` + - `SYSTEM DROP MARK CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` + - `SYSTEM DROP UNCOMPRESSED CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` + - `SYSTEM RELOAD`. Level: `GROUP` + - `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: `RELOAD EMBEDDED DICTIONARIES` + - `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` + - `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` + - `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` + - `SYSTEM MOVES`. Level: `TABLE`. Aliases: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` + - `SYSTEM SENDS`. Level: `GROUP`. Aliases: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` + - `SYSTEM DISTRIBUTED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA`. Level: `TABLE`. Aliases: `SYNC REPLICA` + - `SYSTEM RESTART REPLICA`. Level: `TABLE`. Aliases: `RESTART REPLICA` + - `SYSTEM FLUSH`. Level: `GROUP` + - `SYSTEM FLUSH DISTRIBUTED`. Level: `TABLE`. Aliases: `FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS`. Level: `GLOBAL`. Aliases: `FLUSH LOGS` The `SYSTEM RELOAD EMBEDDED DICTIONARIES` privilege implicitly granted by the `SYSTEM RELOAD DICTIONARY ON *.*` privilege. @@ -431,36 +446,36 @@ The `SYSTEM RELOAD EMBEDDED DICTIONARIES` privilege implicitly granted by the `S Allows using [introspection](../../operations/optimizing-performance/sampling-query-profiler.md) functions. -- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` - - `addressToLine`. Level: `GLOBAL` - - `addressToLineWithInlines`. Level: `GLOBAL` - - `addressToSymbol`. Level: `GLOBAL` - - `demangle`. Level: `GLOBAL` +- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` + - `addressToLine`. Level: `GLOBAL` + - `addressToLineWithInlines`. Level: `GLOBAL` + - `addressToSymbol`. Level: `GLOBAL` + - `demangle`. Level: `GLOBAL` ### SOURCES Allows using external data sources. Applies to [table engines](../../engines/table-engines/index.md) and [table functions](../../sql-reference/table-functions/index.md#table-functions). -- `SOURCES`. Level: `GROUP` - - `FILE`. Level: `GLOBAL` - - `URL`. Level: `GLOBAL` - - `REMOTE`. Level: `GLOBAL` - - `YSQL`. Level: `GLOBAL` - - `ODBC`. Level: `GLOBAL` - - `JDBC`. Level: `GLOBAL` - - `HDFS`. Level: `GLOBAL` - - `S3`. Level: `GLOBAL` +- `SOURCES`. Level: `GROUP` + - `FILE`. Level: `GLOBAL` + - `URL`. Level: `GLOBAL` + - `REMOTE`. Level: `GLOBAL` + - `YSQL`. Level: `GLOBAL` + - `ODBC`. Level: `GLOBAL` + - `JDBC`. Level: `GLOBAL` + - `HDFS`. Level: `GLOBAL` + - `S3`. Level: `GLOBAL` The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges. Examples: -- To create a table with the [MySQL table engine](../../engines/table-engines/integrations/mysql.md), you need `CREATE TABLE (ON db.table_name)` and `MYSQL` privileges. -- To use the [mysql table function](../../sql-reference/table-functions/mysql.md), you need `CREATE TEMPORARY TABLE` and `MYSQL` privileges. +- To create a table with the [MySQL table engine](../../engines/table-engines/integrations/mysql.md), you need `CREATE TABLE (ON db.table_name)` and `MYSQL` privileges. +- To use the [mysql table function](../../sql-reference/table-functions/mysql.md), you need `CREATE TEMPORARY TABLE` and `MYSQL` privileges. ### dictGet -- `dictGet`. Aliases: `dictHas`, `dictGetHierarchy`, `dictIsIn` +- `dictGet`. Aliases: `dictHas`, `dictGetHierarchy`, `dictIsIn` Allows a user to execute [dictGet](../../sql-reference/functions/ext-dict-functions.md#dictget), [dictHas](../../sql-reference/functions/ext-dict-functions.md#dicthas), [dictGetHierarchy](../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy), [dictIsIn](../../sql-reference/functions/ext-dict-functions.md#dictisin) functions. @@ -468,8 +483,17 @@ Privilege level: `DICTIONARY`. **Examples** -- `GRANT dictGet ON mydb.mydictionary TO john` -- `GRANT dictGet ON mydictionary TO john` +- `GRANT dictGet ON mydb.mydictionary TO john` +- `GRANT dictGet ON mydictionary TO john` + + +### displaySecretsInShowAndSelect {#grant-display-secrets} + +Allows a user to view secrets in `SHOW` and `SELECT` queries if both +[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select) +and +[`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select) +are turned on. ### ALL diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index b286d8c932d..5aa61cf8d21 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -1,32 +1,32 @@ --- slug: /en/sql-reference/statements/ sidebar_position: 1 -sidebar_label: Statements +sidebar_label: List of statements --- -# ClickHouse SQL Statements +# ClickHouse SQL Statements Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately: -- [SELECT](/docs/en/sql-reference/statements/select/index.md) -- [INSERT INTO](/docs/en/sql-reference/statements/insert-into.md) -- [CREATE](/docs/en/sql-reference/statements/create/index.md) -- [ALTER](/docs/en/sql-reference/statements/alter/index.md) -- [SYSTEM](/docs/en/sql-reference/statements/system.md) -- [SHOW](/docs/en/sql-reference/statements/show.md) -- [GRANT](/docs/en/sql-reference/statements/grant.md) -- [REVOKE](/docs/en/sql-reference/statements/revoke.md) -- [ATTACH](/docs/en/sql-reference/statements/attach.md) -- [CHECK TABLE](/docs/en/sql-reference/statements/check-table.md) -- [DESCRIBE TABLE](/docs/en/sql-reference/statements/describe-table.md) -- [DETACH](/docs/en/sql-reference/statements/detach.md) -- [DROP](/docs/en/sql-reference/statements/drop.md) -- [EXISTS](/docs/en/sql-reference/statements/exists.md) -- [KILL](/docs/en/sql-reference/statements/kill.md) -- [OPTIMIZE](/docs/en/sql-reference/statements/optimize.md) -- [RENAME](/docs/en/sql-reference/statements/rename.md) -- [SET](/docs/en/sql-reference/statements/set.md) -- [SET ROLE](/docs/en/sql-reference/statements/set-role.md) -- [TRUNCATE](/docs/en/sql-reference/statements/truncate.md) -- [USE](/docs/en/sql-reference/statements/use.md) -- [EXPLAIN](/docs/en/sql-reference/statements/explain.md) +- [SELECT](/docs/en/sql-reference/statements/select/index.md) +- [INSERT INTO](/docs/en/sql-reference/statements/insert-into.md) +- [CREATE](/docs/en/sql-reference/statements/create/index.md) +- [ALTER](/docs/en/sql-reference/statements/alter/index.md) +- [SYSTEM](/docs/en/sql-reference/statements/system.md) +- [SHOW](/docs/en/sql-reference/statements/show.md) +- [GRANT](/docs/en/sql-reference/statements/grant.md) +- [REVOKE](/docs/en/sql-reference/statements/revoke.md) +- [ATTACH](/docs/en/sql-reference/statements/attach.md) +- [CHECK TABLE](/docs/en/sql-reference/statements/check-table.md) +- [DESCRIBE TABLE](/docs/en/sql-reference/statements/describe-table.md) +- [DETACH](/docs/en/sql-reference/statements/detach.md) +- [DROP](/docs/en/sql-reference/statements/drop.md) +- [EXISTS](/docs/en/sql-reference/statements/exists.md) +- [KILL](/docs/en/sql-reference/statements/kill.md) +- [OPTIMIZE](/docs/en/sql-reference/statements/optimize.md) +- [RENAME](/docs/en/sql-reference/statements/rename.md) +- [SET](/docs/en/sql-reference/statements/set.md) +- [SET ROLE](/docs/en/sql-reference/statements/set-role.md) +- [TRUNCATE](/docs/en/sql-reference/statements/truncate.md) +- [USE](/docs/en/sql-reference/statements/use.md) +- [EXPLAIN](/docs/en/sql-reference/statements/explain.md) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 03a4ab3453c..d6e30827f9b 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -4,7 +4,7 @@ sidebar_position: 33 sidebar_label: INSERT INTO --- -# INSERT INTO Statement +# INSERT INTO Statement Inserts data into a table. @@ -64,8 +64,8 @@ INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; If a list of columns does not include all existing columns, the rest of the columns are filled with: -- The values calculated from the `DEFAULT` expressions specified in the table definition. -- Zeros and empty strings, if `DEFAULT` expressions are not defined. +- The values calculated from the `DEFAULT` expressions specified in the table definition. +- Zeros and empty strings, if `DEFAULT` expressions are not defined. Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query: @@ -89,7 +89,14 @@ INSERT INTO t FORMAT TabSeparated 22 Qwerty ``` -You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section “[Interfaces](../../interfaces)”. +You can insert data separately from the query by using the [command-line client](/docs/en/integrations/sql-clients/clickhouse-client-local) or the [HTTP interface](/docs/en/interfaces/http/). + +:::note +If you want to specify `SETTINGS` for `INSERT` query then you have to do it _before_ `FORMAT` clause since everything after `FORMAT format_name` is treated as data. For example: +```sql +INSERT INTO table SETTINGS ... FORMAT format_name data_set +``` +::: ## Constraints @@ -122,7 +129,7 @@ To insert a default value instead of `NULL` into a column with not nullable data INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` -Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. +Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. Compressed files are supported. The compression type is detected by the extension of the file name. Or it can be explicitly specified in a `COMPRESSION` clause. Supported types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. @@ -184,7 +191,7 @@ INSERT INTO [TABLE] FUNCTION table_func ... ``` sql CREATE TABLE simple_table (id UInt32, text String) ENGINE=MergeTree() ORDER BY id; -INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) +INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()'); SELECT * FROM simple_table; ``` @@ -201,22 +208,22 @@ Result: `INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this: -- Add data in fairly large batches, such as 100,000 rows at a time. -- Group data by a partition key before uploading it to ClickHouse. +- Add data in fairly large batches, such as 100,000 rows at a time. +- Group data by a partition key before uploading it to ClickHouse. Performance will not decrease if: -- Data is added in real time. -- You upload data that is usually sorted by time. +- Data is added in real time. +- You upload data that is usually sorted by time. It's also possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To enable the asynchronous mode, switch on the [async_insert](../../operations/settings/settings.md#async-insert) setting. Note that asynchronous insertions are supported only over HTTP protocol, and deduplication is not supported for them. **See Also** -- [async_insert](../../operations/settings/settings.md#async-insert) -- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads) -- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert) -- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout) -- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size) -- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms) -- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms) +- [async_insert](../../operations/settings/settings.md#async-insert) +- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads) +- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert) +- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout) +- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size) +- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms) +- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms) diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 78615a2f9ad..45d336c42f2 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -7,7 +7,7 @@ title: "OPTIMIZE Statement" This query tries to initialize an unscheduled merge of data parts for tables. -:::warning +:::note `OPTIMIZE` can’t fix the `Too many parts` error. ::: @@ -21,10 +21,10 @@ The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/me When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `2`) or on current replica (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `1`). -- If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). -- If you specify `FINAL`, optimization is performed even when all the data is already in one part. You can control this behaviour with [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Also, the merge is forced even if concurrent merges are performed. -- If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. +- If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. +- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). +- If you specify `FINAL`, optimization is performed even when all the data is already in one part. You can control this behaviour with [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Also, the merge is forced even if concurrent merges are performed. +- If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. You can specify how long (in seconds) to wait for inactive replicas to execute `OPTIMIZE` queries by the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. @@ -34,7 +34,7 @@ If the `alter_sync` is set to `2` and some replicas are not active for more than ## BY expression -If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key). +If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explicitly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key). :::note Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion. diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index cc33a7c41d4..a2d4b15df13 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -60,4 +60,4 @@ RENAME DICTIONARY [db0.]dict_A TO [db1.]dict_B [,...] [ON CLUSTER cluster] **See Also** -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Dictionaries](../../sql-reference/dictionaries/index.md) diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index a1b5e0cdb36..9045ec4aba3 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -23,8 +23,8 @@ You can specify only one `ARRAY JOIN` clause in a `SELECT` query. Supported types of `ARRAY JOIN` are listed below: -- `ARRAY JOIN` - In base case, empty arrays are not included in the result of `JOIN`. -- `LEFT ARRAY JOIN` - The result of `JOIN` contains rows with empty arrays. The value for an empty array is set to the default value for the array element type (usually 0, empty string or NULL). +- `ARRAY JOIN` - In base case, empty arrays are not included in the result of `JOIN`. +- `LEFT ARRAY JOIN` - The result of `JOIN` contains rows with empty arrays. The value for an empty array is set to the default value for the array element type (usually 0, empty string or NULL). ## Basic ARRAY JOIN Examples @@ -146,7 +146,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS ma └───────┴─────────┴───┴─────┴────────┘ ``` -The example below uses the [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate) function: +The example below uses the [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) function: ``` sql SELECT s, arr, a, num, arrayEnumerate(arr) @@ -166,8 +166,8 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; Multiple arrays with different sizes can be joined by using: `SETTINGS enable_unaligned_array_join = 1`. Example: ```sql -SELECT s, arr, a, b -FROM arrays_test ARRAY JOIN arr as a, [['a','b'],['c']] as b +SELECT s, arr, a, b +FROM arrays_test ARRAY JOIN arr as a, [['a','b'],['c']] as b SETTINGS enable_unaligned_array_join = 1; ``` @@ -185,7 +185,7 @@ SETTINGS enable_unaligned_array_join = 1; ## ARRAY JOIN with Nested Data Structure -`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/nested.md): +`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/index.md): ``` sql CREATE TABLE nested_test @@ -278,7 +278,7 @@ ARRAY JOIN nest AS n; └───────┴─────┴─────┴─────────┴────────────┘ ``` -Example of using the [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate) function: +Example of using the [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) function: ``` sql SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md index b2d940af3bb..10326b0ef8f 100644 --- a/docs/en/sql-reference/statements/select/distinct.md +++ b/docs/en/sql-reference/statements/select/distinct.md @@ -105,6 +105,6 @@ Take this implementation specificity into account when programming queries. It is possible to obtain the same result by applying [GROUP BY](../../../sql-reference/statements/select/group-by.md) across the same set of values as specified as `SELECT` clause, without using any aggregate functions. But there are few differences from `GROUP BY` approach: -- `DISTINCT` can be applied together with `GROUP BY`. -- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read. -- Data blocks are output as they are processed, without waiting for the entire query to finish running. +- `DISTINCT` can be applied together with `GROUP BY`. +- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read. +- Data blocks are output as they are processed, without waiting for the entire query to finish running. diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md index f73cbff9819..cc4bb9d1c24 100644 --- a/docs/en/sql-reference/statements/select/except.md +++ b/docs/en/sql-reference/statements/select/except.md @@ -149,5 +149,5 @@ Result: **See Also** -- [UNION](union.md#union-clause) -- [INTERSECT](intersect.md#intersect-clause) +- [UNION](union.md#union-clause) +- [INTERSECT](intersect.md#intersect-clause) diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index fb6c1f94902..4ca8e8287c0 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -7,9 +7,9 @@ sidebar_label: FROM The `FROM` clause specifies the source to read data from: -- [Table](../../../engines/table-engines/index.md) -- [Subquery](../../../sql-reference/statements/select/index.md) -- [Table function](../../../sql-reference/table-functions/index.md#table-functions) +- [Table](../../../engines/table-engines/index.md) +- [Subquery](../../../sql-reference/statements/select/index.md) +- [Table function](../../../sql-reference/table-functions/index.md#table-functions) [JOIN](../../../sql-reference/statements/select/join.md) and [ARRAY JOIN](../../../sql-reference/statements/select/array-join.md) clauses may also be used to extend the functionality of the `FROM` clause. @@ -31,8 +31,8 @@ There are drawbacks to using `FINAL` (see below). Queries that use `FINAL` are executed slightly slower than similar queries that do not, because: -- Data is merged during query execution. -- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. +- Data is merged during query execution. +- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 2a4b06660c7..36d401ba04a 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -7,13 +7,13 @@ sidebar_label: GROUP BY `GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows: -- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. -- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. -- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually, this significantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. +- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. +- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. +- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually, this significantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). -:::note +:::note There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. ::: @@ -57,8 +57,8 @@ The subtotals are calculated in the reverse order: at first subtotals are calcul In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line. -:::note -Mind that [HAVING](../../../sql-reference/statements/select/having) clause can affect the subtotals results. +:::note +Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. ::: **Example** @@ -125,8 +125,8 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line. -:::note -Mind that [HAVING](../../../sql-reference/statements/select/having) clause can affect the subtotals results. +:::note +Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. ::: **Example** @@ -220,17 +220,17 @@ If the `WITH TOTALS` modifier is specified, another row will be calculated. This This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` formats, separately from the other rows: -- In `XML` and `JSON*` formats, this row is output as a separate ‘totals’ field. -- In `TabSeparated*`, `CSV*` and `Vertical` formats, the row comes after the main result, preceded by an empty row (after the other data). -- In `Pretty*` formats, the row is output as a separate table after the main result. -- In `Template` format, the row is output according to specified template. -- In the other formats it is not available. +- In `XML` and `JSON*` formats, this row is output as a separate ‘totals’ field. +- In `TabSeparated*`, `CSV*` and `Vertical` formats, the row comes after the main result, preceded by an empty row (after the other data). +- In `Pretty*` formats, the row is output as a separate table after the main result. +- In `Template` format, the row is output according to specified template. +- In the other formats it is not available. -:::note -totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. +:::note +totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. ::: -`WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having) is present. The behavior depends on the `totals_mode` setting. +`WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting. ### Configuring Totals Processing diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 5a8893f6f28..2863c5c0116 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -4,7 +4,7 @@ sidebar_position: 32 sidebar_label: SELECT --- -# SELECT Query +# SELECT Query `SELECT` queries perform data retrieval. By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md) it can be forwarded to a different table. @@ -34,24 +34,24 @@ All clauses are optional, except for the required list of expressions immediatel Specifics of each optional clause are covered in separate sections, which are listed in the same order as they are executed: -- [WITH clause](../../../sql-reference/statements/select/with.md) -- [SELECT clause](#select-clause) -- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) -- [FROM clause](../../../sql-reference/statements/select/from.md) -- [SAMPLE clause](../../../sql-reference/statements/select/sample.md) -- [JOIN clause](../../../sql-reference/statements/select/join.md) -- [PREWHERE clause](../../../sql-reference/statements/select/prewhere.md) -- [WHERE clause](../../../sql-reference/statements/select/where.md) -- [GROUP BY clause](../../../sql-reference/statements/select/group-by.md) -- [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) -- [HAVING clause](../../../sql-reference/statements/select/having) -- [LIMIT clause](../../../sql-reference/statements/select/limit.md) -- [OFFSET clause](../../../sql-reference/statements/select/offset.md) -- [UNION clause](../../../sql-reference/statements/select/union.md) -- [INTERSECT clause](../../../sql-reference/statements/select/intersect.md) -- [EXCEPT clause](../../../sql-reference/statements/select/except.md) -- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) -- [FORMAT clause](../../../sql-reference/statements/select/format.md) +- [WITH clause](../../../sql-reference/statements/select/with.md) +- [SELECT clause](#select-clause) +- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) +- [FROM clause](../../../sql-reference/statements/select/from.md) +- [SAMPLE clause](../../../sql-reference/statements/select/sample.md) +- [JOIN clause](../../../sql-reference/statements/select/join.md) +- [PREWHERE clause](../../../sql-reference/statements/select/prewhere.md) +- [WHERE clause](../../../sql-reference/statements/select/where.md) +- [GROUP BY clause](../../../sql-reference/statements/select/group-by.md) +- [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) +- [HAVING clause](../../../sql-reference/statements/select/having.md) +- [LIMIT clause](../../../sql-reference/statements/select/limit.md) +- [OFFSET clause](../../../sql-reference/statements/select/offset.md) +- [UNION clause](../../../sql-reference/statements/select/union.md) +- [INTERSECT clause](../../../sql-reference/statements/select/intersect.md) +- [EXCEPT clause](../../../sql-reference/statements/select/except.md) +- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) +- [FORMAT clause](../../../sql-reference/statements/select/format.md) ## SELECT Clause @@ -123,11 +123,11 @@ Columns that matched the `COLUMNS` expression can have different data types. If You can put an asterisk in any part of a query instead of an expression. When the query is analyzed, the asterisk is expanded to a list of all table columns (excluding the `MATERIALIZED` and `ALIAS` columns). There are only a few cases when using an asterisk is justified: -- When creating a table dump. -- For tables containing just a few columns, such as system tables. -- For getting information about what columns are in a table. In this case, set `LIMIT 1`. But it is better to use the `DESC TABLE` query. -- When there is strong filtration on a small number of columns using `PREWHERE`. -- In subqueries (since columns that aren’t needed for the external query are excluded from subqueries). +- When creating a table dump. +- For tables containing just a few columns, such as system tables. +- For getting information about what columns are in a table. In this case, set `LIMIT 1`. But it is better to use the `DESC TABLE` query. +- When there is strong filtration on a small number of columns using `PREWHERE`. +- In subqueries (since columns that aren’t needed for the external query are excluded from subqueries). In all other cases, we do not recommend using the asterisk, since it only gives you the drawbacks of a columnar DBMS instead of the advantages. In other words using the asterisk is not recommended. @@ -151,17 +151,17 @@ The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses can support positional argume If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN` and `JOIN` subqueries, the query will be completely stream processed, using O(1) amount of RAM. Otherwise, the query might consume a lot of RAM if the appropriate restrictions are not specified: -- `max_memory_usage` -- `max_rows_to_group_by` -- `max_rows_to_sort` -- `max_rows_in_distinct` -- `max_bytes_in_distinct` -- `max_rows_in_set` -- `max_bytes_in_set` -- `max_rows_in_join` -- `max_bytes_in_join` -- `max_bytes_before_external_sort` -- `max_bytes_before_external_group_by` +- `max_memory_usage` +- `max_rows_to_group_by` +- `max_rows_to_sort` +- `max_rows_in_distinct` +- `max_bytes_in_distinct` +- `max_rows_in_set` +- `max_bytes_in_set` +- `max_rows_in_join` +- `max_bytes_in_join` +- `max_bytes_before_external_sort` +- `max_bytes_before_external_group_by` For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation. diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index ea7a39421a5..8c0ee6b2e96 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -153,5 +153,5 @@ Result: **See Also** -- [UNION](union.md#union-clause) -- [EXCEPT](except.md#except-clause) +- [UNION](union.md#union-clause) +- [EXCEPT](except.md#except-clause) diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index a14b23f6689..352af16042a 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of **Syntax** ```sql -SELECT INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL level]] +SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]] ``` `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. @@ -21,10 +21,11 @@ SELECT INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL ## Implementation Details -- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. -- The query will fail if a file with the same file name already exists. -- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. -- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. +- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. +- The query will fail if a file with the same file name already exists. +- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. +- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. +- If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used. **Example** diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 62d3e9fd69a..08ffae838f8 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/statements/select/join -sidebar_label: JOIN +sidebar_label: Joining Tables --- # JOIN Clause @@ -18,24 +18,28 @@ FROM Expressions from `ON` clause and columns from `USING` clause are called “join keys”. Unless otherwise stated, join produces a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from rows with matching “join keys”, which might produce results with much more rows than the source tables. +## Related Content + +- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Part 1](https://clickhouse.com/blog/clickhouse-fully-supports-joins) + ## Supported Types of JOIN All standard [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) types are supported: -- `INNER JOIN`, only matching rows are returned. -- `LEFT OUTER JOIN`, non-matching rows from left table are returned in addition to matching rows. -- `RIGHT OUTER JOIN`, non-matching rows from right table are returned in addition to matching rows. -- `FULL OUTER JOIN`, non-matching rows from both tables are returned in addition to matching rows. -- `CROSS JOIN`, produces cartesian product of whole tables, “join keys” are **not** specified. +- `INNER JOIN`, only matching rows are returned. +- `LEFT OUTER JOIN`, non-matching rows from left table are returned in addition to matching rows. +- `RIGHT OUTER JOIN`, non-matching rows from right table are returned in addition to matching rows. +- `FULL OUTER JOIN`, non-matching rows from both tables are returned in addition to matching rows. +- `CROSS JOIN`, produces cartesian product of whole tables, “join keys” are **not** specified. `JOIN` without specified type implies `INNER`. Keyword `OUTER` can be safely omitted. Alternative syntax for `CROSS JOIN` is specifying multiple tables in [FROM clause](../../../sql-reference/statements/select/from.md) separated by commas. Additional join types available in ClickHouse: -- `LEFT SEMI JOIN` and `RIGHT SEMI JOIN`, a whitelist on “join keys”, without producing a cartesian product. -- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. -- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. -- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. +- `LEFT SEMI JOIN` and `RIGHT SEMI JOIN`, a whitelist on “join keys”, without producing a cartesian product. +- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. +- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. +- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. :::note When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). @@ -47,6 +51,7 @@ The default join type can be overridden using [join_default_strictness](../../.. The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. + **See also** - [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) @@ -57,6 +62,8 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_ - [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge) - [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) +Use the `cross_to_inner_join_rewrite` setting to define the behavior when ClickHouse fails to rewrite a `CROSS JOIN` as an `INNER JOIN`. The default value is `1`, which allows the join to continue but it will be slower. Set `cross_to_inner_join_rewrite` to `0` if you want an error to be thrown, and set it to `2` to not run the cross joins but instead force a rewrite of all comma/cross joins. If the rewriting fails when the value is `2`, you will receive an error message stating "Please, try to simplify `WHERE` section". + ## ON Section Conditions An `ON` section can contain several conditions combined using the `AND` and `OR` operators. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query. @@ -159,9 +166,9 @@ Result: Algorithm requires the special column in tables. This column: -- Must contain an ordered sequence. -- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). -- Can’t be the only column in the `JOIN` clause. +- Must contain an ordered sequence. +- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). +- Can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -191,7 +198,7 @@ For example, consider the following tables: table_1 table_2 event | ev_time | user_id event | ev_time | user_id - ----------|---------|---------- ----------|---------|---------- + ----------|---------|---------- ----------|---------|---------- ... ... event_1_1 | 12:00 | 42 event_2_1 | 11:59 | 42 ... event_2_2 | 12:30 | 42 @@ -208,8 +215,8 @@ For example, consider the following tables: There are two ways to execute join involving distributed tables: -- When using a normal `JOIN`, the query is sent to remote servers. Subqueries are run on each of them in order to make the right table, and the join is performed with this table. In other words, the right table is formed on each server separately. -- When using `GLOBAL ... JOIN`, first the requestor server runs a subquery to calculate the right table. This temporary table is passed to each remote server, and queries are run on them using the temporary data that was transmitted. +- When using a normal `JOIN`, the query is sent to remote servers. Subqueries are run on each of them in order to make the right table, and the join is performed with this table. In other words, the right table is formed on each server separately. +- When using `GLOBAL ... JOIN`, first the requestor server runs a subquery to calculate the right table. This temporary table is passed to each remote server, and queries are run on them using the temporary data that was transmitted. Be careful when using `GLOBAL`. For more information, see the [Distributed subqueries](../../../sql-reference/operators/in.md#select-distributed-subqueries) section. @@ -267,12 +274,12 @@ The `USING` clause specifies one or more columns to join, which establishes the For multiple `JOIN` clauses in a single `SELECT` query: -- Taking all the columns via `*` is available only if tables are joined, not subqueries. -- The `PREWHERE` clause is not available. +- Taking all the columns via `*` is available only if tables are joined, not subqueries. +- The `PREWHERE` clause is not available. For `ON`, `WHERE`, and `GROUP BY` clauses: -- Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. +- Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. ### Performance @@ -282,7 +289,7 @@ Each time a query is run with the same `JOIN`, the subquery is run again because In some cases, it is more efficient to use [IN](../../../sql-reference/operators/in.md) instead of `JOIN`. -If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is a “dictionaries” feature that you should use instead of `JOIN`. For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. +If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is a “dictionaries” feature that you should use instead of `JOIN`. For more information, see the [Dictionaries](../../../sql-reference/dictionaries/index.md) section. ### Memory Limitations @@ -290,8 +297,8 @@ By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_j If you need to restrict `JOIN` operation memory consumption use the following settings: -- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — Limits number of rows in the hash table. -- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — Limits size of the hash table. +- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — Limits number of rows in the hash table. +- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — Limits size of the hash table. When any of these limits is reached, ClickHouse acts as the [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) setting instructs. diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index 28f3d7e86d7..4cfc56ecbf9 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -9,8 +9,8 @@ A query with the `LIMIT n BY expressions` clause selects the first `n` rows for ClickHouse supports the following syntax variants: -- `LIMIT [offset_value, ]n BY expressions` -- `LIMIT n OFFSET offset_value BY expressions` +- `LIMIT [offset_value, ]n BY expressions` +- `LIMIT n OFFSET offset_value BY expressions` During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md#select-order-by) clause or implicitly as a property of the table engine (row order is only guaranteed when using [ORDER BY](order-by.md#select-order-by), otherwise the row blocks will not be ordered due to multi-threading). Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index e231a1cc72c..712395a0357 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -16,8 +16,8 @@ If the ORDER BY clause is omitted, the order of the rows is also undefined, and There are two approaches to `NaN` and `NULL` sorting order: -- By default or with the `NULLS LAST` modifier: first the values, then `NaN`, then `NULL`. -- With the `NULLS FIRST` modifier: first `NULL`, then `NaN`, then other values. +- By default or with the `NULLS LAST` modifier: first the values, then `NaN`, then `NULL`. +- With the `NULLS FIRST` modifier: first `NULL`, then `NaN`, then other values. ### Example @@ -544,6 +544,54 @@ Result: └─────┴──────────┴───────┘ ``` +## Filling grouped by sorting prefix + +It can be useful to fill rows which have the same values in particular columns independently, - a good example is filling missing values in time series. +Assume there is the following time series table: +``` sql +CREATE TABLE timeseries +( + `sensor_id` UInt64, + `timestamp` DateTime64(3, 'UTC'), + `value` Float64 +) +ENGINE = Memory; + +SELECT * FROM timeseries; + +┌─sensor_id─┬───────────────timestamp─┬─value─┐ +│ 234 │ 2021-12-01 00:00:03.000 │ 3 │ +│ 432 │ 2021-12-01 00:00:01.000 │ 1 │ +│ 234 │ 2021-12-01 00:00:07.000 │ 7 │ +│ 432 │ 2021-12-01 00:00:05.000 │ 5 │ +└───────────┴─────────────────────────┴───────┘ +``` +And we'd like to fill missing values for each sensor independently with 1 second interval. +The way to achieve it is to use `sensor_id` column as sorting prefix for filling column `timestamp`: +``` +SELECT * +FROM timeseries +ORDER BY + sensor_id, + timestamp WITH FILL +INTERPOLATE ( value AS 9999 ) + +┌─sensor_id─┬───────────────timestamp─┬─value─┐ +│ 234 │ 2021-12-01 00:00:03.000 │ 3 │ +│ 234 │ 2021-12-01 00:00:04.000 │ 9999 │ +│ 234 │ 2021-12-01 00:00:05.000 │ 9999 │ +│ 234 │ 2021-12-01 00:00:06.000 │ 9999 │ +│ 234 │ 2021-12-01 00:00:07.000 │ 7 │ +│ 432 │ 2021-12-01 00:00:01.000 │ 1 │ +│ 432 │ 2021-12-01 00:00:02.000 │ 9999 │ +│ 432 │ 2021-12-01 00:00:03.000 │ 9999 │ +│ 432 │ 2021-12-01 00:00:04.000 │ 9999 │ +│ 432 │ 2021-12-01 00:00:05.000 │ 5 │ +└───────────┴─────────────────────────┴───────┘ +``` +Here, the `value` column was interpolated with `9999` just to make filled rows more noticeable. +This behavior is controlled by setting `use_with_fill_by_sorting_prefix` (enabled by default) + ## Related content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index fb44d7c5a44..137f86cc8b9 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -11,9 +11,9 @@ When data sampling is enabled, the query is not performed on all the data, but o Approximated query processing can be useful in the following cases: -- When you have strict latency requirements (like below 100ms) but you can’t justify the cost of additional hardware resources to meet them. -- When your raw data is not accurate, so approximation does not noticeably degrade the quality. -- Business requirements target approximate results (for cost-effectiveness, or to market exact results to premium users). +- When you have strict latency requirements (like below 100ms) but you can’t justify the cost of additional hardware resources to meet them. +- When your raw data is not accurate, so approximation does not noticeably degrade the quality. +- Business requirements target approximate results (for cost-effectiveness, or to market exact results to premium users). :::note You can only use sampling with the tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). @@ -21,9 +21,9 @@ You can only use sampling with the tables in the [MergeTree](../../../engines/ta The features of data sampling are listed below: -- Data sampling is a deterministic mechanism. The result of the same `SELECT .. SAMPLE` query is always the same. -- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](../../../sql-reference/operators/in.md) clause. Also, you can join samples using the [JOIN](../../../sql-reference/statements/select/join.md) clause. -- Sampling allows reading less data from a disk. Note that you must specify the sampling key correctly. For more information, see [Creating a MergeTree Table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). +- Data sampling is a deterministic mechanism. The result of the same `SELECT .. SAMPLE` query is always the same. +- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](../../../sql-reference/operators/in.md) clause. Also, you can join samples using the [JOIN](../../../sql-reference/statements/select/join.md) clause. +- Sampling allows reading less data from a disk. Note that you must specify the sampling key correctly. For more information, see [Creating a MergeTree Table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). For the `SAMPLE` clause the following syntax is supported: @@ -34,7 +34,7 @@ For the `SAMPLE` clause the following syntax is supported: | `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | -## SAMPLE K +## SAMPLE K {#select-sample-k} Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`. @@ -54,7 +54,7 @@ ORDER BY PageViews DESC LIMIT 1000 In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10. -## SAMPLE N +## SAMPLE N {#select-sample-n} Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. @@ -90,7 +90,7 @@ FROM visits SAMPLE 10000000 ``` -## SAMPLE K OFFSET M +## SAMPLE K OFFSET M {#select-sample-offset} Here `k` and `m` are numbers from 0 to 1. Examples are shown below. diff --git a/docs/en/sql-reference/statements/select/union.md b/docs/en/sql-reference/statements/select/union.md index 002aeaa4488..92a4ed1bb20 100644 --- a/docs/en/sql-reference/statements/select/union.md +++ b/docs/en/sql-reference/statements/select/union.md @@ -81,8 +81,8 @@ Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneo **See Also** -- [insert_null_as_default](../../../operations/settings/settings.md#insert_null_as_default) setting. -- [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting. +- [insert_null_as_default](../../../operations/settings/settings.md#insert_null_as_default) setting. +- [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting. [Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/union/) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 18b019dd017..21c0010498a 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -6,13 +6,22 @@ sidebar_label: SHOW # SHOW Statements -## SHOW CREATE TABLE +N.B. `SHOW CREATE (TABLE|DATABASE|USER)` hides secrets unless +[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select) +is turned on, +[`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select) +is turned on and user has +[`displaySecretsInShowAndSelect`](grant.md#grant-display-secrets) privilege. + +## SHOW CREATE TABLE | DICTIONARY | VIEW | DATABASE ``` sql -SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY|VIEW] [db.]table|view [INTO OUTFILE filename] [FORMAT format] +SHOW [CREATE] [TEMPORARY] TABLE|DICTIONARY|VIEW|DATABASE [db.]table|view [INTO OUTFILE filename] [FORMAT format] ``` -Returns a single `String`-type ‘statement’ column, which contains a single value – the `CREATE` query used for creating the specified object. +Returns a single column of type String containing the CREATE query used for creating the specified object. + +`SHOW TABLE t` and `SHOW DATABASE db` have the same meaning as `SHOW CREATE TABLE|DATABASE t|db`, but `SHOW t` and `SHOW db` are not supported. Note that if you use this statement to get `CREATE` query of system tables, you will get a *fake* query, which only declares table structure, but cannot be used to create table. @@ -21,16 +30,16 @@ Note that if you use this statement to get `CREATE` query of system tables, you Prints a list of all databases. ```sql -SHOW DATABASES [LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] +SHOW DATABASES [[NOT] LIKE | ILIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] ``` This statement is identical to the query: ```sql -SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] +SELECT name FROM system.databases [WHERE name [NOT] LIKE | ILIKE ''] [LIMIT ] [INTO OUTFILE filename] [FORMAT format] ``` -### Examples +**Examples** Getting database names, containing the symbols sequence 'de' in their names: @@ -92,32 +101,16 @@ Result: └────────────────────────────────┘ ``` -### See Also +**See also** -- [CREATE DATABASE](https://clickhouse.com/docs/en/sql-reference/statements/create/database/#query-language-create-database) - -## SHOW PROCESSLIST - -``` sql -SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] -``` - -Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries. - -The `SELECT * FROM system.processes` query returns data about all the current queries. - -Tip (execute in the console): - -``` bash -$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" -``` +- [CREATE DATABASE](https://clickhouse.com/docs/en/sql-reference/statements/create/database/#query-language-create-database) ## SHOW TABLES Displays a list of tables. ```sql -SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] +SHOW [FULL] [TEMPORARY] TABLES [{FROM | IN} ] [[NOT] LIKE | ILIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` If the `FROM` clause is not specified, the query returns the list of tables from the current database. @@ -125,10 +118,10 @@ If the `FROM` clause is not specified, the query returns the list of tables from This statement is identical to the query: ```sql -SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] +SELECT name FROM system.tables [WHERE name [NOT] LIKE | ILIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -### Examples +**Examples** Getting table names, containing the symbols sequence 'user' in their names: @@ -191,14 +184,62 @@ Result: └────────────────────────────────┘ ``` -### See Also +**See also** -- [Create Tables](https://clickhouse.com/docs/en/getting-started/tutorial/#create-tables) -- [SHOW CREATE TABLE](https://clickhouse.com/docs/en/sql-reference/statements/show/#show-create-table) +- [Create Tables](https://clickhouse.com/docs/en/getting-started/tutorial/#create-tables) +- [SHOW CREATE TABLE](https://clickhouse.com/docs/en/sql-reference/statements/show/#show-create-table) + +## SHOW COLUMNS + +Displays a list of columns + +```sql +SHOW [EXTENDED] [FULL] COLUMNS {FROM | IN}
[{FROM | IN} ] [{[NOT] {LIKE | ILIKE} '' | WHERE }] [LIMIT ] [INTO +OUTFILE ] [FORMAT ] +``` + +The database and table name can be specified in abbreviated form as `.
`, i.e. `FROM tab FROM db` and `FROM db.tab` are +equivalent. If no database is specified, the query returns the list of columns from the current database. + +The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. + +The optional keyword `FULL` causes the output to include the collation, comment and privilege columns. + +`SHOW COLUMNS` produces a result table with the following structure: +- field - The name of the column (String) +- type - The column data type (String) +- null - If the column data type is Nullable (UInt8) +- key - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String) +- default - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String)) +- extra - Additional information, currently unused (String) +- collation - (only if `FULL` keyword was specified) Collation of the column, always `NULL` because ClickHouse has no per-column collations (Nullable(String)) +- comment - (only if `FULL` keyword was specified) Comment on the column (String) +- privilege - (only if `FULL` keyword was specified) The privilege you have on this column, currently not available (String) + +**Examples** + +Getting information about all columns in table 'order' starting with 'delivery_': + +```sql +SHOW COLUMNS FROM 'orders' LIKE 'delivery_%' +``` + +Result: + +``` text +┌─field───────────┬─type─────┬─null─┬─key─────┬─default─┬─extra─┐ +│ delivery_date │ DateTime │ 0 │ PRI SOR │ ᴺᵁᴸᴸ │ │ +│ delivery_status │ Bool │ 0 │ │ ᴺᵁᴸᴸ │ │ +└─────────────────┴──────────┴──────┴─────────┴─────────┴───────┘ +``` + +**See also** + +- [system.columns](https://clickhouse.com/docs/en/operations/system-tables/columns) ## SHOW DICTIONARIES -Displays a list of [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +Displays a list of [Dictionaries](../../sql-reference/dictionaries/index.md). ``` sql SHOW DICTIONARIES [FROM ] [LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] @@ -212,7 +253,7 @@ You can get the same results as the `SHOW DICTIONARIES` query in the following w SELECT name FROM system.dictionaries WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -**Example** +**Examples** The following query selects the first two rows from the list of tables in the `system` database, whose names contain `reg`. @@ -227,11 +268,82 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 └──────────────┘ ``` +## SHOW INDEX + +Displays a list of primary and data skipping indexes of a table. + +```sql +SHOW [EXTENDED] {INDEX | INDEXES | KEYS } {FROM | IN}
[{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ] +``` + +The database and table name can be specified in abbreviated form as `.
`, i.e. `FROM tab FROM db` and `FROM db.tab` are +equivalent. If no database is specified, the query assumes the current database as database. + +The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. + +`SHOW INDEX` produces a result table with the following structure: +- table - The name of the table (String) +- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8) +- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) +- seq_in_index - Currently unused +- column_name - Currently unused +- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) +- cardinality - Currently unused +- sub_part - Currently unused +- packed - Currently unused +- null - Currently unused +- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String) +- comment - Currently unused +- index_comment - Currently unused +- visible - If the index is visible to the optimizer, always `YES` (String) +- expression - The index expression (String) + +**Examples** + +Getting information about all indexes in table 'tbl' + +```sql +SHOW INDEX FROM 'tbl' +``` + +Result: + +``` text +┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ +│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │ +│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │ +│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │ +│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │ +│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │ +└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ +``` + +**See also** + +- [system.tables](../../operations/system-tables/tables.md) +- [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) + +## SHOW PROCESSLIST + +``` sql +SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] +``` + +Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries. + +The `SELECT * FROM system.processes` query returns data about all the current queries. + +Tip (execute in the console): + +``` bash +$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" +``` + ## SHOW GRANTS Shows privileges for a user. -### Syntax +**Syntax** ``` sql SHOW GRANTS [FOR user1 [, user2 ...]] @@ -243,9 +355,7 @@ If user is not specified, the query returns privileges for the current user. Shows parameters that were used at a [user creation](../../sql-reference/statements/create/user.md). -`SHOW CREATE USER` does not output user passwords. - -### Syntax +**Syntax** ``` sql SHOW CREATE USER [name1 [, name2 ...] | CURRENT_USER] @@ -255,7 +365,7 @@ SHOW CREATE USER [name1 [, name2 ...] | CURRENT_USER] Shows parameters that were used at a [role creation](../../sql-reference/statements/create/role.md). -### Syntax +**Syntax** ``` sql SHOW CREATE ROLE name1 [, name2 ...] @@ -265,7 +375,7 @@ SHOW CREATE ROLE name1 [, name2 ...] Shows parameters that were used at a [row policy creation](../../sql-reference/statements/create/row-policy.md). -### Syntax +**Syntax** ``` sql SHOW CREATE [ROW] POLICY name ON [database1.]table1 [, [database2.]table2 ...] @@ -275,7 +385,7 @@ SHOW CREATE [ROW] POLICY name ON [database1.]table1 [, [database2.]table2 ...] Shows parameters that were used at a [quota creation](../../sql-reference/statements/create/quota.md). -### Syntax +**Syntax** ``` sql SHOW CREATE QUOTA [name1 [, name2 ...] | CURRENT] @@ -285,7 +395,7 @@ SHOW CREATE QUOTA [name1 [, name2 ...] | CURRENT] Shows parameters that were used at a [settings profile creation](../../sql-reference/statements/create/settings-profile.md). -### Syntax +**Syntax** ``` sql SHOW CREATE [SETTINGS] PROFILE name1 [, name2 ...] @@ -293,9 +403,9 @@ SHOW CREATE [SETTINGS] PROFILE name1 [, name2 ...] ## SHOW USERS -Returns a list of [user account](../../operations/access-rights.md#user-account-management) names. To view user accounts parameters, see the system table [system.users](../../operations/system-tables/users.md#system_tables-users). +Returns a list of [user account](../../guides/sre/user-management/index.md#user-account-management) names. To view user accounts parameters, see the system table [system.users](../../operations/system-tables/users.md#system_tables-users). -### Syntax +**Syntax** ``` sql SHOW USERS @@ -303,18 +413,18 @@ SHOW USERS ## SHOW ROLES -Returns a list of [roles](../../operations/access-rights.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants). +Returns a list of [roles](../../guides/sre/user-management/index.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants). -### Syntax +**Syntax** ``` sql SHOW [CURRENT|ENABLED] ROLES ``` ## SHOW PROFILES -Returns a list of [setting profiles](../../operations/access-rights.md#settings-profiles-management). To view user accounts parameters, see the system table [settings_profiles](../../operations/system-tables/settings_profiles.md#system_tables-settings_profiles). +Returns a list of [setting profiles](../../guides/sre/user-management/index.md#settings-profiles-management). To view user accounts parameters, see the system table [settings_profiles](../../operations/system-tables/settings_profiles.md#system_tables-settings_profiles). -### Syntax +**Syntax** ``` sql SHOW [SETTINGS] PROFILES @@ -322,9 +432,9 @@ SHOW [SETTINGS] PROFILES ## SHOW POLICIES -Returns a list of [row policies](../../operations/access-rights.md#row-policy-management) for the specified table. To view user accounts parameters, see the system table [system.row_policies](../../operations/system-tables/row_policies.md#system_tables-row_policies). +Returns a list of [row policies](../../guides/sre/user-management/index.md#row-policy-management) for the specified table. To view user accounts parameters, see the system table [system.row_policies](../../operations/system-tables/row_policies.md#system_tables-row_policies). -### Syntax +**Syntax** ``` sql SHOW [ROW] POLICIES [ON [db.]table] @@ -332,9 +442,9 @@ SHOW [ROW] POLICIES [ON [db.]table] ## SHOW QUOTAS -Returns a list of [quotas](../../operations/access-rights.md#quotas-management). To view quotas parameters, see the system table [system.quotas](../../operations/system-tables/quotas.md#system_tables-quotas). +Returns a list of [quotas](../../guides/sre/user-management/index.md#quotas-management). To view quotas parameters, see the system table [system.quotas](../../operations/system-tables/quotas.md#system_tables-quotas). -### Syntax +**Syntax** ``` sql SHOW QUOTAS @@ -344,16 +454,16 @@ SHOW QUOTAS Returns a [quota](../../operations/quotas.md) consumption for all users or for current user. To view another parameters, see system tables [system.quotas_usage](../../operations/system-tables/quotas_usage.md#system_tables-quotas_usage) and [system.quota_usage](../../operations/system-tables/quota_usage.md#system_tables-quota_usage). -### Syntax +**Syntax** ``` sql SHOW [CURRENT] QUOTA ``` ## SHOW ACCESS -Shows all [users](../../operations/access-rights.md#user-account-management), [roles](../../operations/access-rights.md#role-management), [profiles](../../operations/access-rights.md#settings-profiles-management), etc. and all their [grants](../../sql-reference/statements/grant.md#grant-privileges). +Shows all [users](../../guides/sre/user-management/index.md#user-account-management), [roles](../../guides/sre/user-management/index.md#role-management), [profiles](../../guides/sre/user-management/index.md#settings-profiles-management), etc. and all their [grants](../../sql-reference/statements/grant.md#grant-privileges). -### Syntax +**Syntax** ``` sql SHOW ACCESS @@ -366,13 +476,14 @@ Returns a list of clusters. All available clusters are listed in the [system.clu `SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster. ::: -### Syntax +**Syntax** ``` sql SHOW CLUSTER '' -SHOW CLUSTERS [LIKE|NOT LIKE ''] [LIMIT ] +SHOW CLUSTERS [[NOT] LIKE|ILIKE ''] [LIMIT ] ``` -### Examples + +**Examples** Query: @@ -509,7 +620,7 @@ Result: **See Also** -- [system.settings](../../operations/system-tables/settings.md) table +- [system.settings](../../operations/system-tables/settings.md) table ## SHOW ENGINES @@ -521,4 +632,4 @@ Outputs the content of the [system.table_engines](../../operations/system-tables **See Also** -- [system.table_engines](../../operations/system-tables/table_engines.md) table \ No newline at end of file +- [system.table_engines](../../operations/system-tables/table_engines.md) table diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index f9f55acfcec..c5596b7ba5f 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -8,7 +8,7 @@ sidebar_label: SYSTEM ## RELOAD EMBEDDED DICTIONARIES -Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md). +Reload all [Internal dictionaries](../../sql-reference/dictionaries/index.md). By default, internal dictionaries are disabled. Always returns `Ok.` regardless of the result of the internal dictionary update. @@ -76,7 +76,7 @@ Resets the mark cache. ## DROP REPLICA -Dead replicas can be dropped using following syntax: +Dead replicas of `ReplicatedMergeTree` tables can be dropped using following syntax: ``` sql SYSTEM DROP REPLICA 'replica_name' FROM TABLE database.table; @@ -85,13 +85,25 @@ SYSTEM DROP REPLICA 'replica_name'; SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk'; ``` -Queries will remove the replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk. +Queries will remove the `ReplicatedMergeTree` replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk. The first one removes metadata of `'replica_name'` replica of `database.table` table. The second one does the same for all replicated tables in the database. The third one does the same for all replicated tables on the local server. The fourth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation. +## DROP DATABASE REPLICA + +Dead replicas of `Replicated` databases can be dropped using following syntax: + +``` sql +SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'] FROM DATABASE database; +SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name']; +SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'] FROM ZKPATH '/path/to/table/in/zk'; +``` + +Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica path from ZooKeeper when there's no database to run `DROP DATABASE`. Please note that it does not remove `ReplicatedMergeTree` replicas (so you may need `SYSTEM DROP REPLICA` as well). Shard and replica names are the names that were specified in `Replicated` engine arguments when creating the database. Also, these names can be obtained from `database_shard_name` and `database_replica_name` columns in `system.clusters`. If the `FROM SHARD` clause is missing, then `replica_name` must be a full replica name in `shard_name|replica_name` format. + ## DROP UNCOMPRESSED CACHE Reset the uncompressed data cache. @@ -114,11 +126,11 @@ This will also create system tables even if message queue is empty. ## RELOAD CONFIG -Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. +Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Note that `SYSTEM RELOAD CONFIG` does not reload `USER` configuration stored in ZooKeeper, it only reloads `USER` configuration that is stored in `users.xml`. To reload all `USER` config use `SYSTEM RELOAD USERS` ## RELOAD USERS -Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage. +Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. ## SHUTDOWN @@ -224,6 +236,14 @@ Clears freezed backup with the specified name from all the disks. See more about SYSTEM UNFREEZE WITH NAME ``` +### WAIT LOADING PARTS + +Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded. + +``` sql +SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name +``` + ## Managing ReplicatedMergeTree Tables ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables. @@ -280,13 +300,17 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ### SYNC REPLICA -Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster. Will run until `receive_timeout` if fetches currently disabled for the table. +Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. ``` sql -SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name +SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL] ``` -After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. +After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported: + + - If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue. + - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed. + - If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed. ### RESTART REPLICA @@ -312,7 +336,7 @@ One may execute query after: Replica attaches locally found parts and sends info about them to Zookeeper. Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network). -:::warning +:::note Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached. ::: @@ -369,7 +393,7 @@ SYSTEM DROP FILESYSTEM CACHE It's too heavy and has potential for misuse. ::: -Will do sync syscall. +Will do sync syscall. ```sql SYSTEM SYNC FILE CACHE diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md new file mode 100644 index 00000000000..40ac1ab4f99 --- /dev/null +++ b/docs/en/sql-reference/statements/undrop.md @@ -0,0 +1,99 @@ +--- +slug: /en/sql-reference/statements/undrop +sidebar_label: UNDROP +--- + +# UNDROP TABLE + +Cancels the dropping of the table. + +Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an Atomic database +within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement. Dropped tables are listed in +a system table called `system.dropped_tables`. + +If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view. + +:::note +UNDROP TABLE is experimental. To use it add this setting: +```sql +set allow_experimental_undrop_table_query = 1; +``` +::: + +:::tip +Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md) +::: + +Syntax: + +``` sql +UNDROP TABLE [db.]name [UUID ''] [ON CLUSTER cluster] +``` + +**Example** + +``` sql +set allow_experimental_undrop_table_query = 1; +``` + +```sql +CREATE TABLE undropMe +( + `id` UInt8 +) +ENGINE = MergeTree +ORDER BY id +``` + +```sql +DROP TABLE undropMe +``` +```sql +SELECT * +FROM system.dropped_tables +FORMAT Vertical +``` +```response +Row 1: +────── +index: 0 +database: default +table: undropMe +uuid: aa696a1a-1d70-4e60-a841-4c80827706cc +engine: MergeTree +metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql +table_dropped_time: 2023-04-05 14:12:12 + +1 row in set. Elapsed: 0.001 sec. +``` +```sql +UNDROP TABLE undropMe +``` +```response +Ok. +``` +```sql +SELECT * +FROM system.dropped_tables +FORMAT Vertical +``` +```response +Ok. + +0 rows in set. Elapsed: 0.001 sec. +``` +```sql +DESCRIBE TABLE undropMe +FORMAT Vertical +``` +```response +Row 1: +────── +name: id +type: UInt8 +default_type: +default_expression: +comment: +codec_expression: +ttl_expression: +``` diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md index 90d19e6be0e..5230479cbd2 100644 --- a/docs/en/sql-reference/statements/watch.md +++ b/docs/en/sql-reference/statements/watch.md @@ -6,7 +6,7 @@ sidebar_label: WATCH # WATCH Statement (Experimental) -:::warning +:::note This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`. ::: @@ -107,4 +107,4 @@ The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/s :::note The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting. -::: \ No newline at end of file +::: diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 63c5042f9e8..f5651c2dcb6 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -14,7 +14,7 @@ The `INSERT` query uses both parsers: INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). +The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions). Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. It allows for avoiding issues with large `INSERT` queries. @@ -31,30 +31,30 @@ There may be any number of space symbols between syntactical constructions (incl ClickHouse supports either SQL-style and C-style comments: -- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted. -- C-style are from `/*` to `*/`and can be multiline, spaces are not required either. +- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted. +- C-style are from `/*` to `*/`and can be multiline, spaces are not required either. ## Keywords Keywords are case-insensitive when they correspond to: -- SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. -- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is the same as `datetime`. +- SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. +- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is the same as `datetime`. You can check whether a data type name is case-sensitive in the [system.data_type_families](../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#syntax-identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. +Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. ## Identifiers Identifiers are: -- Cluster, database, table, partition, and column names. -- Functions. -- Data types. -- [Expression aliases](#syntax-expression_aliases). +- Cluster, database, table, partition, and column names. +- Functions. +- Data types. +- [Expression aliases](#expression_aliases). Identifiers can be quoted or non-quoted. The latter is preferred. @@ -70,10 +70,10 @@ There are numeric, string, compound, and `NULL` literals. Numeric literal tries to be parsed: -- First, as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. -- If unsuccessful, as a 64-bit unsigned number, using the [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) function. -- If unsuccessful, as a floating-point number using the [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) function. -- Otherwise, it returns an error. +- First, as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. +- If unsuccessful, as a 64-bit unsigned number, using the [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) function. +- If unsuccessful, as a floating-point number using the [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) function. +- Otherwise, it returns an error. Literal value has the smallest type that the value fits in. For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more information, see [Data types](../sql-reference/data-types/index.md). @@ -86,8 +86,8 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF` String literals must be enclosed in single quotes, double quotes are not supported. Escaping works either -- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or -- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones. +- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or +- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones. In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`. @@ -108,7 +108,7 @@ Depending on the data format (input or output), `NULL` may have a different repr There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation. -In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#operator-is-null) and [IS NOT NULL](../sql-reference/operators/index.md) operators and the related functions `isNull` and `isNotNull`. +In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#is-null) and [IS NOT NULL](../sql-reference/operators/index.md#is-not-null) operators and the related functions `isNull` and `isNotNull`. ### Heredoc @@ -149,7 +149,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` - SET param_a = 13; SET param_b = 'str'; SET param_c = '2022-08-04 18:30:53'; -SET param_d = {'10': [11, 12], '13': [14, 15]}'; +SET param_d = {'10': [11, 12], '13': [14, 15]}; SELECT {a: UInt32}, @@ -166,7 +166,7 @@ Result: If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is retrieved as a `String`: -```sql +```bash clickhouse-client --param_message='hello' --query="SELECT {message: String}" ``` @@ -190,7 +190,7 @@ Query parameters are not general text substitutions which can be used in arbitra ## Functions Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`. -There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions. +There are regular and aggregate functions (see the section [Aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md)). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions. ## Operators @@ -199,7 +199,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult ## Data Types and Database Table Engines -Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”. +Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections [Data types](/docs/en/sql-reference/data-types/index.md), [Table engines](/docs/en/engines/table-engines/index.md), and [CREATE](/docs/en/sql-reference/statements/create/index.md). ## Expression Aliases @@ -209,19 +209,19 @@ An alias is a user-defined name for expression in a query. expr AS alias ``` -- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword. +- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword. - For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. + For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function. + In the [CAST](./functions/type-conversion-functions.md#castx-t) function, the `AS` keyword has another meaning. See the description of the function. -- `expr` — Any expression supported by ClickHouse. +- `expr` — Any expression supported by ClickHouse. - For example, `SELECT column_name * 2 AS double FROM some_table`. + For example, `SELECT column_name * 2 AS double FROM some_table`. -- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#syntax-identifiers) syntax. +- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#identifiers) syntax. - For example, `SELECT "table t".column_name FROM table_name AS "table t"`. + For example, `SELECT "table t".column_name FROM table_name AS "table t"`. ### Notes on Usage @@ -254,11 +254,11 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`. +In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer-column-name-to-alias) to `1`. ## Asterisk -In a `SELECT` query, an asterisk can replace the expression. For more information, see the section “SELECT”. +In a `SELECT` query, an asterisk can replace the expression. For more information, see the section [SELECT](/docs/en/sql-reference/statements/select/index.md#asterisk). ## Expressions diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 4904553c39a..904c678750c 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -25,7 +25,7 @@ clusterAllReplicas('cluster_name', db, table[, sharding_key]) - `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `db.table` or `db`, `table` - Name of a database and a table. -- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. +- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. **Returned value** @@ -47,13 +47,13 @@ Using the `cluster` and `clusterAllReplicas` table functions are less efficient The `cluster` and `clusterAllReplicas` table functions can be useful in the following cases: -- Accessing a specific cluster for data comparison, debugging, and testing. -- Queries to various ClickHouse clusters and replicas for research purposes. -- Infrequent distributed requests that are made manually. +- Accessing a specific cluster for data comparison, debugging, and testing. +- Queries to various ClickHouse clusters and replicas for research purposes. +- Infrequent distributed requests that are made manually. Connection settings like `host`, `port`, `user`, `password`, `compression`, `secure` are taken from `` config section. See details in [Distributed engine](../../engines/table-engines/special/distributed.md). **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) -- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) +- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) diff --git a/docs/en/sql-reference/table-functions/dictionary.md b/docs/en/sql-reference/table-functions/dictionary.md index 8a8cba8ff24..73d5039a64b 100644 --- a/docs/en/sql-reference/table-functions/dictionary.md +++ b/docs/en/sql-reference/table-functions/dictionary.md @@ -1,11 +1,11 @@ --- slug: /en/sql-reference/table-functions/dictionary sidebar_position: 54 -sidebar_label: dictionary function +sidebar_label: dictionary title: dictionary --- -Displays the [dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table. Works the same way as [Dictionary](../../engines/table-engines/special/dictionary.md) engine. +Displays the [dictionary](../../sql-reference/dictionaries/index.md) data as a ClickHouse table. Works the same way as [Dictionary](../../engines/table-engines/special/dictionary.md) engine. **Syntax** @@ -15,7 +15,7 @@ dictionary('dict') **Arguments** -- `dict` — A dictionary name. [String](../../sql-reference/data-types/string.md). +- `dict` — A dictionary name. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -56,4 +56,4 @@ Result: **See Also** -- [Dictionary engine](../../engines/table-engines/special/dictionary.md#dictionary) +- [Dictionary engine](../../engines/table-engines/special/dictionary.md#dictionary) diff --git a/docs/en/sql-reference/table-functions/executable.md b/docs/en/sql-reference/table-functions/executable.md index 635188763cf..c6aba61aedb 100644 --- a/docs/en/sql-reference/table-functions/executable.md +++ b/docs/en/sql-reference/table-functions/executable.md @@ -20,7 +20,7 @@ A key advantage between ordinary UDF functions and the `executable` table functi The `executable` table function requires three parameters and accepts an optional list of input queries: ```sql -executable(script_name, format, structure, [input_query...]) +executable(script_name, format, structure, [input_query...] [,SETTINGS ...]) ``` - `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting) @@ -83,9 +83,18 @@ The response looks like: └────┴────────────┘ ``` +## Settings + +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Default value is `false`. +- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. +- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. +- `command_termination_timeout` — executable script should contain main read-write loop. After table function is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. + ## Passing Query Results to a Script -Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function: +Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable.md#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function: ```sql SELECT * FROM executable( @@ -94,4 +103,4 @@ SELECT * FROM executable( 'id UInt64, sentiment Float32', (SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20) ); -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index d2ef66dde73..28c2dc9f1f3 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -6,27 +6,73 @@ sidebar_label: file # file -Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones. +Provides a table-like interface to SELECT from and INSERT to files. This table function is similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function. Use file() when working with local files, and s3() when working with buckets in S3, GCS, or MinIO. -`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables. +The `file` function can be used in `SELECT` and `INSERT` queries to read from or write to files. **Syntax** ``` sql -file(path [,format] [,structure]) +file(path [,format] [,structure] [,compression]) ``` **Parameters** -- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. +- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. +- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. **Returned value** A table with the specified structure for reading or writing data in the specified file. -**Examples** +## File Write Examples + +### Write to a TSV file + +```sql +INSERT INTO TABLE FUNCTION +file('test.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +VALUES (1, 2, 3), (3, 2, 1), (1, 3, 2) +``` + +As a result, the data is written into the file `test.tsv`: + +```bash +# cat /var/lib/clickhouse/user_files/test.tsv +1 2 3 +3 2 1 +1 3 2 +``` + +### Partitioned Write to multiple TSV files + +If you specify `PARTITION BY` expression when inserting data into a file() function, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. + +```sql +INSERT INTO TABLE FUNCTION +file('test_{_partition_id}.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +PARTITION BY column3 +VALUES (1, 2, 3), (3, 2, 1), (1, 3, 2) +``` + +As a result, the data is written into three files: `test_1.tsv`, `test_2.tsv`, and `test_3.tsv`. + +```bash +# cat /var/lib/clickhouse/user_files/test_1.tsv +3 2 1 + +# cat /var/lib/clickhouse/user_files/test_2.tsv +1 3 2 + +# cat /var/lib/clickhouse/user_files/test_3.tsv +1 2 3 +``` + +## File Read Examples + +### SELECT from a CSV file Setting `user_files_path` and the contents of the file `test.csv`: @@ -43,7 +89,9 @@ $ cat /var/lib/clickhouse/user_files/test.csv Getting data from a table in `test.csv` and selecting the first two rows from it: ``` sql -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2; +SELECT * FROM +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; ``` ``` text @@ -53,17 +101,24 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U └─────────┴─────────┴─────────┘ ``` -Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file: +Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file: ``` sql -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10; +SELECT * FROM +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 10; ``` -Inserting data from a file into a table: +### Inserting data from a file into a table: ``` sql -INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1); -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); +INSERT INTO FUNCTION +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +VALUES (1, 2, 3), (3, 2, 1); +``` +```sql +SELECT * FROM +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); ``` ``` text @@ -77,11 +132,11 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. -- `**` - Fetches all files inside the folder recursively. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `**` - Fetches all files inside the folder recursively. Constructions with `{}` are similar to the [remote](remote.md) table function. @@ -89,12 +144,12 @@ Constructions with `{}` are similar to the [remote](remote.md) table function. Suppose we have several files with the following relative paths: -- 'some_dir/some_file_1' -- 'some_dir/some_file_2' -- 'some_dir/some_file_3' -- 'another_dir/some_file_1' -- 'another_dir/some_file_2' -- 'another_dir/some_file_3' +- 'some_dir/some_file_1' +- 'some_dir/some_file_2' +- 'some_dir/some_file_3' +- 'another_dir/some_file_1' +- 'another_dir/some_file_2' +- 'another_dir/some_file_3' Query the number of rows in these files: @@ -108,7 +163,7 @@ Query the number of rows in all files of these two directories: SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` -:::warning +:::note If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: @@ -138,9 +193,9 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 ## Virtual Columns -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. **See Also** -- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns) +- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md index 811eae12942..2813eef5bcf 100644 --- a/docs/en/sql-reference/table-functions/format.md +++ b/docs/en/sql-reference/table-functions/format.md @@ -16,9 +16,9 @@ format(format_name, [structure], data) **Parameters** -- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. -- `structure` - Structure of the table. Optional. Format 'column1_name column1_type, column2_name column2_type, ...'. -- `data` — String literal or constant expression that returns a string containing data in specified format +- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. +- `structure` - Structure of the table. Optional. Format 'column1_name column1_type, column2_name column2_type, ...'. +- `data` — String literal or constant expression that returns a string containing data in specified format **Returned value** @@ -95,4 +95,4 @@ $$) **See Also** -- [Formats](../../interfaces/formats.md) +- [Formats](../../interfaces/formats.md) diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md new file mode 100644 index 00000000000..bfa7f36fa48 --- /dev/null +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -0,0 +1,184 @@ +--- +slug: /en/sql-reference/table-functions/gcs +sidebar_position: 45 +sidebar_label: s3 +keywords: [gcs, bucket] +--- + +# gcs Table Function + +Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/). + +**Syntax** + +``` sql +gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression]) +``` + +:::tip GCS +The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC. + +::: + +**Arguments** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. + + :::note GCS + The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: + ``` + https://storage.googleapis.com/// + ``` + and not ~~https://storage.cloud.google.com~~. + ::: + +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Selecting the first two rows from the table from GCS file `https://storage.googleapis.com/my-test-bucket-768/data.csv`: + +``` sql +SELECT * +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +The similar but from file with `gzip` compression: + +``` sql +SELECT * +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +## Usage + +Suppose that we have several files with following URIs on GCS: + +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_4.csv' + +Count the amount of rows in files ending with numbers from 1 to 3: + +``` sql +SELECT count(*) +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +Count the total amount of rows in all files in these two directories: + +``` sql +SELECT count(*) +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: + +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +SELECT count(*) +FROM gcs('https://storage.googleapis.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +Insert data into file `test-data.csv.gz`: + +``` sql +INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); +``` + +Insert data into file `test-data.csv.gz` from existing table: + +``` sql +INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; +``` + +Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively: + +``` sql +SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively: + +``` sql +SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +## Partitioned Write + +If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. + +**Examples** + +1. Using partition ID in a key creates separate files: + +```sql +INSERT INTO TABLE FUNCTION + gcs('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32') + PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24); +``` +As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`. + +2. Using partition ID in a bucket name creates files in different buckets: + +```sql +INSERT INTO TABLE FUNCTION + gcs('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32') + PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24); +``` +As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`. + +**See Also** + +- [S3 table function](s3.md) +- [S3 engine](../../engines/table-engines/integrations/s3.md) diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index b53ccdd42b5..bfc114daa72 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -16,11 +16,11 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri **Arguments** -- `name` — Name of corresponding column. -- `TypeName` — Type of corresponding column. -- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`. -- `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. -- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. +- `name` — Name of corresponding column. +- `TypeName` — Type of corresponding column. +- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`. +- `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. +- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. **Returned Value** diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 97a253a5356..6ba24211131 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -14,9 +14,9 @@ hdfs(URI, format, structure) **Input parameters** -- `URI` — The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `URI` — The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -43,10 +43,10 @@ LIMIT 2 Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). @@ -54,12 +54,12 @@ Constructions with `{}` are similar to the [remote table function](../../sql-ref 1. Suppose that we have several files with following URIs on HDFS: -- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ 2. Query the amount of rows in these files: @@ -79,7 +79,7 @@ SELECT count(*) FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -:::warning +:::note If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: @@ -94,9 +94,9 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin ## Virtual Columns -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. **See Also** -- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns) +- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 231c552610f..afd1fd28a5a 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -6,7 +6,7 @@ sidebar_label: hdfsCluster # hdfsCluster Table Function -Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. **Syntax** @@ -16,10 +16,10 @@ hdfsCluster(cluster_name, URI, format, structure) **Arguments** -- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -29,12 +29,12 @@ A table with the specified structure for reading data in the specified file. 1. Suppose that we have a ClickHouse cluster named `cluster_simple`, and several files with following URIs on HDFS: -- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ 2. Query the amount of rows in these files: @@ -50,11 +50,11 @@ SELECT count(*) FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -:::warning +:::note If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: **See Also** -- [HDFS engine](../../engines/table-engines/integrations/hdfs.md) -- [HDFS table function](../../sql-reference/table-functions/hdfs.md) +- [HDFS engine](../../engines/table-engines/integrations/hdfs.md) +- [HDFS table function](../../sql-reference/table-functions/hdfs.md) diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index fda4d274005..713b0f9bbf5 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -32,7 +32,13 @@ A table with the specified structure for reading data in the specified Iceberg t SELECT * FROM iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test') ``` -Using named collections: +:::important +ClickHouse currently supports reading v1 (v2 support is coming soon!) of the Iceberg format via the `iceberg` table function and `Iceberg` table engine. +::: + +## Defining a named collection + +Here is an example of configuring a named collection for storing the URL and credentials: ```xml diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index b49c2f8da20..b16295db36a 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -10,16 +10,16 @@ Table functions are methods for constructing tables. You can use table functions in: -- [FROM](../../sql-reference/statements/select/from.md) clause of the `SELECT` query. +- [FROM](../../sql-reference/statements/select/from.md) clause of the `SELECT` query. The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes. -- [CREATE TABLE AS table_function()](../../sql-reference/statements/create/table.md) query. +- [CREATE TABLE AS table_function()](../../sql-reference/statements/create/table.md) query. It's one of the methods of creating a table. -- [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query. +- [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query. -:::warning +:::note You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. ::: diff --git a/docs/en/sql-reference/table-functions/input.md b/docs/en/sql-reference/table-functions/input.md index b07bc1bb431..6aa1cab00c1 100644 --- a/docs/en/sql-reference/table-functions/input.md +++ b/docs/en/sql-reference/table-functions/input.md @@ -24,7 +24,7 @@ with all transferred data is not created. **Examples** -- Let the `test` table has the following structure `(a String, b String)` +- Let the `test` table has the following structure `(a String, b String)` and data in `data.csv` has a different structure `(col1 String, col2 Date, col3 Int32)`. Query for insert data from the `data.csv` into the `test` table with simultaneous conversion looks like this: @@ -34,7 +34,7 @@ with all transferred data is not created. $ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT lower(col1), col3 * col3 FROM input('col1 String, col2 Date, col3 Int32') FORMAT CSV"; ``` -- If `data.csv` contains data of the same structure `test_structure` as the table `test` then these two queries are equal: +- If `data.csv` contains data of the same structure `test_structure` as the table `test` then these two queries are equal: diff --git a/docs/en/sql-reference/table-functions/merge.md b/docs/en/sql-reference/table-functions/merge.md index ce3cdded3f2..ba0d19b804e 100644 --- a/docs/en/sql-reference/table-functions/merge.md +++ b/docs/en/sql-reference/table-functions/merge.md @@ -24,4 +24,4 @@ merge('db_name', 'tables_regexp') **See Also** -- [Merge](../../engines/table-engines/special/merge.md) table engine +- [Merge](../../engines/table-engines/special/merge.md) table engine diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md index dd063ae1796..042225dd1f0 100644 --- a/docs/en/sql-reference/table-functions/mongodb.md +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -16,19 +16,19 @@ mongodb(host:port, database, collection, user, password, structure [, options]) **Arguments** -- `host:port` — MongoDB server address. +- `host:port` — MongoDB server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `collection` — Remote collection name. +- `collection` — Remote collection name. -- `user` — MongoDB user. +- `user` — MongoDB user. -- `password` — User password. +- `password` — User password. -- `structure` - The schema for the ClickHouse table returned from this function. +- `structure` - The schema for the ClickHouse table returned from this function. -- `options` - MongoDB connection string options (optional parameter). +- `options` - MongoDB connection string options (optional parameter). **Returned Value** @@ -70,5 +70,5 @@ SELECT * FROM mongodb( **See Also** -- [The `MongoDB` table engine](../../engines/table-engines/integrations/mongodb.md) -- [Using MongoDB as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources/#mongodb) +- [The `MongoDB` table engine](/docs/en/engines/table-engines/integrations/mongodb.md) +- [Using MongoDB as a dictionary source](/docs/en/sql-reference/dictionaries/index.md#mongodb) diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index b995319c645..8d7656365f5 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -16,21 +16,21 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_ **Arguments** -- `host:port` — MySQL server address. +- `host:port` — MySQL server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `table` — Remote table name. +- `table` — Remote table name. -- `user` — MySQL user. +- `user` — MySQL user. -- `password` — User password. +- `password` — User password. -- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values: +- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values: - `0` - The query is executed as `INSERT INTO`. - `1` - The query is executed as `REPLACE INTO`. -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception). +- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception). Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1;` @@ -56,7 +56,7 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', A table object with the same columns as the original MySQL table. -:::note +:::note In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. ::: @@ -109,5 +109,5 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); **See Also** -- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) -- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) +- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) +- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql) diff --git a/docs/en/sql-reference/table-functions/null.md b/docs/en/sql-reference/table-functions/null.md index 04d7f08f259..d27295f1916 100644 --- a/docs/en/sql-reference/table-functions/null.md +++ b/docs/en/sql-reference/table-functions/null.md @@ -15,7 +15,7 @@ null('structure') **Parameter** -- `structure` — A list of columns and column types. [String](../../sql-reference/data-types/string.md). +- `structure` — A list of columns and column types. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -38,4 +38,4 @@ DROP TABLE IF EXISTS t; See also: -- [Null table engine](../../engines/table-engines/special/null.md) +- [Null table engine](../../engines/table-engines/special/null.md) diff --git a/docs/en/sql-reference/table-functions/odbc.md b/docs/en/sql-reference/table-functions/odbc.md index 7e13424bc8a..781ebacc680 100644 --- a/docs/en/sql-reference/table-functions/odbc.md +++ b/docs/en/sql-reference/table-functions/odbc.md @@ -14,9 +14,9 @@ odbc(connection_settings, external_database, external_table) Parameters: -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Name of a database in an external DBMS. -- `external_table` — Name of a table in the `external_database`. +- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. +- `external_database` — Name of a database in an external DBMS. +- `external_table` — Name of a table in the `external_database`. To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`. @@ -101,5 +101,5 @@ SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') ## See Also -- [ODBC dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc) -- [ODBC table engine](../../engines/table-engines/integrations/odbc.md). +- [ODBC dictionaries](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc) +- [ODBC table engine](../../engines/table-engines/integrations/odbc.md). diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 87fc6ecb234..3e147fb8417 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -16,18 +16,18 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) **Arguments** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — PostgreSQL user. -- `password` — User password. -- `schema` — Non-default table schema. Optional. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — PostgreSQL user. +- `password` — User password. +- `schema` — Non-default table schema. Optional. **Returned Value** A table object with the same columns as the original PostgreSQL table. -:::note +:::note In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. ::: @@ -43,7 +43,7 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp PostgreSQL Array types converts into ClickHouse arrays. -:::note +:::note Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows. ::: @@ -129,8 +129,10 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **See Also** -- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) -- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) +- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) +- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql) ## Related content + - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) +- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index ccaf9565144..bf0abd49fc6 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -53,7 +53,7 @@ The `remote` table function can be useful in the following cases: - Infrequent distributed requests that are made manually. - Distributed requests where the set of servers is re-defined each time. -### Adresses +### Addresses ``` text example01-01-1 diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index d7199717798..a9ddc286ec5 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -12,7 +12,7 @@ Provides a table-like interface to select/insert files in [Amazon S3](https://aw **Syntax** ``` sql -s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) ``` :::tip GCS @@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: @@ -33,9 +33,10 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ and not ~~https://storage.cloud.google.com~~. ::: -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. **Returned value** @@ -43,48 +44,54 @@ A table with the specified structure for reading or writing data in the specifie **Examples** -Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`: +Selecting the first 5 rows from the table from S3 file `https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv`: ``` sql SELECT * -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2; +FROM s3( + 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv', + 'CSVWithNames' +) +LIMIT 5; ``` -``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ +```response +┌───────Date─┬────Open─┬────High─┬─────Low─┬───Close─┬───Volume─┬─OpenInt─┐ +│ 1984-09-07 │ 0.42388 │ 0.42902 │ 0.41874 │ 0.42388 │ 23220030 │ 0 │ +│ 1984-09-10 │ 0.42388 │ 0.42516 │ 0.41366 │ 0.42134 │ 18022532 │ 0 │ +│ 1984-09-11 │ 0.42516 │ 0.43668 │ 0.42516 │ 0.42902 │ 42498199 │ 0 │ +│ 1984-09-12 │ 0.42902 │ 0.43157 │ 0.41618 │ 0.41618 │ 37125801 │ 0 │ +│ 1984-09-13 │ 0.43927 │ 0.44052 │ 0.43927 │ 0.43927 │ 57822062 │ 0 │ +└────────────┴─────────┴─────────┴─────────┴─────────┴──────────┴─────────┘ ``` -The similar but from file with `gzip` compression: +:::note +ClickHouse uses filename extensions to determine the format of the data. For example, we could have run the previous command without the `CSVWithNames`: ``` sql SELECT * -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') -LIMIT 2; +FROM s3( + 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv' +) +LIMIT 5; ``` -``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ -``` +ClickHouse also can determine the compression of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically. +::: + ## Usage Suppose that we have several files with following URIs on S3: -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' Count the amount of rows in files ending with numbers from 1 to 3: @@ -112,7 +119,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ └─────────┘ ``` -:::warning +:::tip If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: @@ -179,6 +186,22 @@ INSERT INTO TABLE FUNCTION ``` As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`. +## Accessing public buckets + +ClickHouse tries to fetch credentials from many different types of sources. +Sometimes, it can produce problems when accessing some buckets that are public causing the client to return `403` error code. +This issue can be avoided by using `NOSIGN` keyword, forcing the client to ignore all the credentials, and not sign the requests. + +``` sql +SELECT * +FROM s3( + 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv', + NOSIGN, + 'CSVWithNames' +) +LIMIT 5; +``` + **See Also** -- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [S3 engine](../../engines/table-engines/integrations/s3.md) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index f420a69596c..a1d9b9cdad4 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -5,7 +5,7 @@ sidebar_label: s3Cluster title: "s3Cluster Table Function" --- -Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. **Syntax** @@ -15,11 +15,11 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [, **Arguments** -- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -42,11 +42,11 @@ SELECT * FROM s3Cluster( Count the total amount of rows in all files in the cluster `cluster_simple`: -:::warning +:::tip If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: **See Also** -- [S3 engine](../../engines/table-engines/integrations/s3.md) -- [s3 table function](../../sql-reference/table-functions/s3.md) +- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [s3 table function](../../sql-reference/table-functions/s3.md) diff --git a/docs/en/sql-reference/table-functions/sqlite.md b/docs/en/sql-reference/table-functions/sqlite.md index 1895f32421e..344fab4fad2 100644 --- a/docs/en/sql-reference/table-functions/sqlite.md +++ b/docs/en/sql-reference/table-functions/sqlite.md @@ -15,12 +15,12 @@ Allows to perform queries on a data stored in an [SQLite](../../engines/database **Arguments** -- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md). -- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md). +- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md). +- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md). **Returned value** -- A table object with the same columns as in the original `SQLite` table. +- A table object with the same columns as in the original `SQLite` table. **Example** @@ -42,4 +42,4 @@ Result: **See Also** -- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine +- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 014dc3ae853..f157a850a12 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -13,7 +13,7 @@ sidebar_label: url **Syntax** ``` sql -url(URL [,format] [,structure]) +url(URL [,format] [,structure] [,headers]) ``` **Parameters** @@ -21,6 +21,7 @@ url(URL [,format] [,structure]) - `URL` — HTTP or HTTPS server address, which can accept `GET` or `POST` requests (for `SELECT` or `INSERT` queries correspondingly). Type: [String](../../sql-reference/data-types/string.md). - `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md). - `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). +- `headers` - Headers in `'headers('key1'='value1', 'key2'='value2')'` format. You can set headers for HTTP call. **Returned value** @@ -31,7 +32,7 @@ A table with the specified format and structure and with data from the defined ` Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md#csv) format. ``` sql -SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3; +SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32', headers('Accept'='text/csv; charset=utf-8')) LIMIT 3; ``` Inserting data from a `URL` into a table: @@ -46,3 +47,12 @@ SELECT * FROM test_table; Patterns in curly brackets `{ }` are used to generate a set of shards or to specify failover addresses. Supported pattern types and examples see in the description of the [remote](remote.md#globs-in-addresses) function. Character `|` inside patterns is used to specify failover addresses. They are iterated in the same order as listed in the pattern. The number of generated addresses is limited by [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements) setting. + +## Virtual Columns + +- `_path` — Path to the `URL`. +- `_file` — Resource name of the `URL`. + +**See Also** + +- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/sql-reference/table-functions/urlCluster.md b/docs/en/sql-reference/table-functions/urlCluster.md new file mode 100644 index 00000000000..8f19632c433 --- /dev/null +++ b/docs/en/sql-reference/table-functions/urlCluster.md @@ -0,0 +1,62 @@ +--- +slug: /en/sql-reference/table-functions/urlCluster +sidebar_position: 55 +sidebar_label: urlCluster +--- + +# urlCluster Table Function + +Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. + +**Syntax** + +``` sql +urlCluster(cluster_name, URL, format, structure) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `URL` — HTTP or HTTPS server address, which can accept `GET` requests. Type: [String](../../sql-reference/data-types/string.md). +- `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md). +- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). + +**Returned value** + +A table with the specified format and structure and with data from the defined `URL`. + +**Examples** + +Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md#csv) format. + +1. Create a basic HTTP server using the standard Python 3 tools and start it: + +```python +from http.server import BaseHTTPRequestHandler, HTTPServer + +class CSVHTTPServer(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header('Content-type', 'text/csv') + self.end_headers() + + self.wfile.write(bytes('Hello,1\nWorld,2\n', "utf-8")) + +if __name__ == "__main__": + server_address = ('127.0.0.1', 12345) + HTTPServer(server_address, CSVHTTPServer).serve_forever() +``` + +``` sql +SELECT * FROM urlCluster('cluster_simple','http://127.0.0.1:12345', CSV, 'column1 String, column2 UInt32') +``` + +## Globs in URL + +Patterns in curly brackets `{ }` are used to generate a set of shards or to specify failover addresses. Supported pattern types and examples see in the description of the [remote](remote.md#globs-in-addresses) function. +Character `|` inside patterns is used to specify failover addresses. They are iterated in the same order as listed in the pattern. The number of generated addresses is limited by [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements) setting. + +**See Also** + +- [HDFS engine](../../engines/table-engines/special/url.md) +- [URL table function](../../sql-reference/table-functions/url.md) \ No newline at end of file diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index 074481cc522..2c21fe9ff4b 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -15,11 +15,11 @@ view(subquery) **Arguments** -- `subquery` — `SELECT` query. +- `subquery` — `SELECT` query. **Returned value** -- A table. +- A table. **Example** @@ -63,4 +63,4 @@ SELECT * FROM cluster(`cluster_name`, view(SELECT a, b, c FROM table_name)); **See Also** -- [View Table Engine](https://clickhouse.com/docs/en/engines/table-engines/special/view/) +- [View Table Engine](https://clickhouse.com/docs/en/engines/table-engines/special/view/) diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md new file mode 100644 index 00000000000..68fbfe0b22a --- /dev/null +++ b/docs/en/sql-reference/transactions.md @@ -0,0 +1,260 @@ +--- +slug: /en/guides/developer/transactional +--- +# Transactional (ACID) support + +INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): +- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; +- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; +* INSERT into multiple tables with one statement is possible if materialized views are involved; +* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; +* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; +* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; +* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; +* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; +* ClickHouse is using MVCC with snapshot isolation internally; +* all ACID properties are valid even in case of server kill / crash; +* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; +* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. + +## Transactions, Commit, and Rollback + +In addition to the functionality described at the top of this document, ClickHouse has experimental support for transactions, commits, and rollback functionality. + +### Requirements + +- Deploy ClickHouse Keeper or ZooKeeper to track transactions +- Atomic DB only (Default) +- Non-Replicated MergeTree table engine only +- Enable experimental transaction support by adding this setting in `config.d/transactions.xml`: + ```xml + + 1 + + ``` + +### Notes +- This is an experimental feature, and changes should be expected. +- If an exception occurs during a transaction, you cannot commit the transaction. This includes all exceptions, including `UNKNOWN_FUNCTION` exceptions caused by typos. +- Nested transactions are not supported; finish the current transaction and start a new one instead + +### Configuration + +These examples are with a single node ClickHouse server with ClickHouse Keeper enabled. + +#### Enable experimental transaction support + +```xml title=/etc/clickhouse-server/config.d/transactions.xml + + 1 + +``` + +#### Basic configuration for a single ClickHouse server node with ClickHouse Keeper enabled + +:::note +See the [deployment](docs/en/deployment-guides/terminology.md) documentation for details on deploying ClickHouse server and a proper quorum of ClickHouse Keeper nodes. The configuration shown here is for experimental purposes. +::: + +```xml title=/etc/clickhouse-server/config.d/config.xml + + + debug + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 3 + + node 1 + 0.0.0.0 + 8123 + 9000 + + + clickhouse-01 + 9181 + + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 10000 + 30000 + information + + + + 1 + clickhouse-keeper-01 + 9234 + + + + +``` + +### Example + +#### Verify that experimental transactions are enabled + +Issue a `BEGIN TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions. + +```sql +BEGIN TRANSACTION +``` +```response +Ok. +``` + +:::tip +If you see the following error, then check your configuration file to make sure that `allow_experimental_transactions` is set to `1` (or any value other than `0` or `false`). +``` +Code: 48. DB::Exception: Received from localhost:9000. +DB::Exception: Transactions are not supported. +(NOT_IMPLEMENTED) +``` + +You can also check ClickHouse Keeper by issuing +``` +echo ruok | nc localhost 9181 +``` +ClickHouse Keeper should respond with `imok`. +::: + +```sql +ROLLBACK +``` +```response +Ok. +``` + +#### Create a table for testing + +:::tip +Creation of tables is not transactional. Run this DDL query outside of a transaction. +::: + +```sql +CREATE TABLE mergetree_table +( + `n` Int64 +) +ENGINE = MergeTree +ORDER BY n +``` +```response +Ok. +``` + +#### Begin a transaction and insert a row + +```sql +BEGIN TRANSACTION +``` +```response +Ok. +``` + +```sql +INSERT INTO mergetree_table FORMAT Values (10) +``` +```response +Ok. +``` + +```sql +SELECT * +FROM mergetree_table +``` +```response +┌──n─┐ +│ 10 │ +└────┘ +``` +:::note +You can query the table from within a transaction and see that the row was inserted even though it has not yet been committed. +::: + +#### Rollback the transaction, and query the table again + +Verify that the transaction is rolled back: +```sql +ROLLBACK +``` +```response +Ok. +``` +```sql +SELECT * +FROM mergetree_table +``` +```response +Ok. + +0 rows in set. Elapsed: 0.002 sec. +``` + +#### Complete a transaction and query the table again + +```sql +BEGIN TRANSACTION +``` +```response +Ok. +``` + +```sql +INSERT INTO mergetree_table FORMAT Values (42) +``` +```response +Ok. +``` + +```sql +COMMIT +``` +```response +Ok. Elapsed: 0.002 sec. +``` + +```sql +SELECT * +FROM mergetree_table +``` +```response +┌──n─┐ +│ 42 │ +└────┘ +``` + +### Transactions introspection + +You can inspect transactions by querying the `system.transactions` table, but note that you cannot query that +table from a session that is in a transaction–open a second `clickhouse client` session to query that table. + +```sql +SELECT * +FROM system.transactions +FORMAT Vertical +``` +```response +Row 1: +────── +tid: (33,61,'51e60bce-6b82-4732-9e1d-b40705ae9ab8') +tid_hash: 11240433987908122467 +elapsed: 210.017820947 +is_readonly: 1 +state: RUNNING +``` + +## More Details + +See this [meta issue](https://github.com/ClickHouse/ClickHouse/issues/48794) to find much more extensive tests and to keep up to date with the progress. + diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 59d49830852..7ee2102c14d 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -80,7 +80,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] - `PARTITION BY` - defines how to break a resultset into groups. - `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function. - `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame. -- `WINDOW` - allows to reuse a window definition with multiple exressions. +- `WINDOW` - allows to reuse a window definition with multiple expressions. ### Functions @@ -140,8 +140,8 @@ ORDER BY │ 1 │ 1 │ 1 │ [1,2,3] │ <┐ │ 1 │ 2 │ 2 │ [1,2,3] │ │ 1-st group │ 1 │ 3 │ 3 │ [1,2,3] │ <┘ -│ 2 │ 0 │ 0 │ [0] │ <- 2-nd group -│ 3 │ 0 │ 0 │ [0] │ <- 3-d group +│ 2 │ 0 │ 0 │ [0] │ <- 2-nd group +│ 3 │ 0 │ 0 │ [0] │ <- 3-d group └──────────┴───────┴───────┴──────────────┘ ``` diff --git a/docs/get-clickhouse-docs.sh b/docs/get-clickhouse-docs.sh new file mode 100755 index 00000000000..92ba8058dcc --- /dev/null +++ b/docs/get-clickhouse-docs.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -e +# The script to clone or update the user-guides documentation repo +# https://github.com/ClickHouse/clickhouse-docs + +WORKDIR=$(dirname "$0") +WORKDIR=$(readlink -f "${WORKDIR}") +cd "$WORKDIR" + +UPDATE_PERIOD_HOURS=${UPDATE_PERIOD_HOURS:=24} + +if [ -d "clickhouse-docs" ]; then + git -C clickhouse-docs pull +else + if [ -n "$1" ]; then + url_type="$1" + else + read -rp "Enter the URL type (ssh | https): " url_type + fi + case "$url_type" in + ssh) + git_url=git@github.com:ClickHouse/clickhouse-docs.git + ;; + https) + git_url=https://github.com/ClickHouse/clickhouse-docs.git + ;; + *) + echo "Url type must be 'ssh' or 'https'" + exit 1 + ;; + esac + + if [ -n "$2" ]; then + set_git_hook="$2" + elif [ -z "$1" ]; then + read -rp "Would you like to setup git hook for automatic update? (y|n): " set_git_hook + fi + + git clone "$git_url" "clickhouse-docs" + + if [ "$set_git_hook" = "y" ]; then + hook_command="$(pwd)/pull-clickhouse-docs-hook.sh $UPDATE_PERIOD_HOURS ||:" + hook_file=$(realpath "$(pwd)/../.git/hooks/post-checkout") + if grep -Faq "pull-clickhouse-docs-hook.sh" "$hook_file" 2>/dev/null; then + echo "Looks like the update hook already exists, will not add another one" + else + echo "Appending '$hook_command' to $hook_file" + echo "$hook_command" >> "$hook_file" + chmod u+x "$hook_file" # Just in case it did not exist before append + fi + elif [ ! "$set_git_hook" = "n" ]; then + echo "Expected 'y' or 'n', got '$set_git_hook', will not setup git hook" + fi +fi diff --git a/docs/pull-clickhouse-docs-hook.sh b/docs/pull-clickhouse-docs-hook.sh new file mode 100755 index 00000000000..bd93a1d3997 --- /dev/null +++ b/docs/pull-clickhouse-docs-hook.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -e +# The script to update user-guides documentation repo +# https://github.com/ClickHouse/clickhouse-docs + +WORKDIR=$(dirname "$0") +WORKDIR=$(readlink -f "${WORKDIR}") +cd "$WORKDIR" + +UPDATE_PERIOD_HOURS="${1:-24}" # By default update once per 24 hours; 0 means "always update" + +if [ ! -d "clickhouse-docs" ]; then + echo "There's no clickhouse-docs/ dir, run get-clickhouse-docs.sh first to clone the repo" + exit 1 +fi + +# Do not update it too often +LAST_FETCH_TS=$(stat -c %Y clickhouse-docs/.git/FETCH_HEAD 2>/dev/null || echo 0) +CURRENT_TS=$(date +%s) +HOURS_SINCE_LAST_FETCH=$(( (CURRENT_TS - LAST_FETCH_TS) / 60 / 60 )) + +if [ "$HOURS_SINCE_LAST_FETCH" -lt "$UPDATE_PERIOD_HOURS" ]; then + exit 0; +fi + +echo "Updating clickhouse-docs..." +git -C clickhouse-docs pull diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 80472178ae2..7294bc2ae87 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -41,9 +41,15 @@ ClickHouse не работает и не собирается на 32-битны Выполните в терминале: - git clone git@github.com:your_github_username/ClickHouse.git --recursive + git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git cd ClickHouse +Или (если вы хотите использовать sparse checkout для submodules): + + git clone git@github.com:your_github_username/ClickHouse.git + cd ClickHouse + ./contrib/update-submodules.sh + Замените слово `your_github_username` в команде для git на имя вашего аккаунта на GitHub. Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. diff --git a/docs/ru/engines/database-engines/materialized-mysql.md b/docs/ru/engines/database-engines/materialized-mysql.md index c214e08dce1..df56b7a0bd6 100644 --- a/docs/ru/engines/database-engines/materialized-mysql.md +++ b/docs/ru/engines/database-engines/materialized-mysql.md @@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', ### DDL-запросы {#ddl-queries} -DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может конвертировать какой-либо DDL-запрос, он его игнорирует. +DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может конвертировать какой-либо DDL-запрос, он его игнорирует. ### Репликация данных {#data-replication} diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 58e03ba30cc..832486c038a 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -211,4 +211,4 @@ ClickHouse может поддерживать учетные данные Kerbe **Смотрите также** - [Виртуальные столбцы](index.md#table_engines-virtual_columns) -- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size) +- [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index dac490468d0..e3b4238a200 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -89,7 +89,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] └─────────────────────┴───────────┴──────────┴──────┘ ``` -Первая строка отменяет предыдущее состояние объекта (пользователя). Она должен повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`. +Первая строка отменяет предыдущее состояние объекта (пользователя). Она должна повторять все поля из ключа сортировки для отменённого состояния за исключением `Sign`. Вторая строка содержит текущее состояние. diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 7269cc023e4..812b0c0a2d4 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -89,7 +89,7 @@ ORDER BY expr - `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов. - `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием для удаления данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа). - `merge_with_recompression_ttl_timeout` — минимальное время в секундах перед повторным слиянием для повторного сжатия данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа). - - `try_fetch_recompressed_part_timeout` — время ожидания (в секундах) перед началом слияния с повторным сжатием. В течение этого времени ClickHouse пытается извлечь сжатую часть из реплики, которая назначила это слияние. Значение по умолчанию: `7200` секунд (2 часа). + - `try_fetch_recompressed_part_timeout` — время ожидания (в секундах) перед началом слияния с повторным сжатием. В течение этого времени ClickHouse пытается извлечь сжатую часть из реплики, которая назначила это слияние. Значение по умолчанию: `7200` секунд (2 часа). - `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её. - `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192. - `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes). @@ -337,7 +337,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Поддерживаемые типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. - Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall). + Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall). **Примеры** @@ -361,14 +361,14 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT | [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | | [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](../../../sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../../sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | | [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | | [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | | [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | | [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../../sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../../sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса. @@ -391,12 +391,12 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT ## Проекции {#projections} Проекции похожи на [материализованные представления](../../../sql-reference/statements/create/view.md#materialized), но определяются на уровне кусков данных. Это обеспечивает гарантии согласованности данных наряду с автоматическим использованием в запросах. -Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [allow_experimental_projection_optimization](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#force-optimize-projection). +Проекции — это экспериментальная возможность. Чтобы включить поддержку проекций, установите настройку [optimize_use_projections](../../../operations/settings/settings.md#allow-experimental-projection-optimization) в значение `1`. См. также настройку [force_optimize_projection ](../../../operations/settings/settings.md#optimize_use_projections). Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final). ### Запрос проекции {#projection-query} -Запрос проекции — это то, что определяет проекцию. Такой запрос неявно выбирает данные из родительской таблицы. +Запрос проекции — это то, что определяет проекцию. Такой запрос неявно выбирает данные из родительской таблицы. **Синтаксис** ```sql @@ -406,9 +406,9 @@ SELECT [GROUP BY] [ORDER BY] Проекции можно изменить или удалить с помощью запроса [ALTER](../../../sql-reference/statements/alter/projection.md). ### Хранение проекции {#projection-storage} -Проекции хранятся в каталоге куска данных. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный кусок таблицы `MergeTree`. Таблица создается запросом определения проекции. -Если присутствует секция `GROUP BY`, то используется движок [AggregatingMergeTree](aggregatingmergetree.md), а все агрегатные функции преобразуются в `AggregateFunction`. -Если присутствует секция `ORDER BY`, таблица `MergeTree` использует ее в качестве выражения для первичного ключа. +Проекции хранятся в каталоге куска данных. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный кусок таблицы `MergeTree`. Таблица создается запросом определения проекции. +Если присутствует секция `GROUP BY`, то используется движок [AggregatingMergeTree](aggregatingmergetree.md), а все агрегатные функции преобразуются в `AggregateFunction`. +Если присутствует секция `ORDER BY`, таблица `MergeTree` использует ее в качестве выражения для первичного ключа. Во время процесса слияния кусок данных проекции объединяется с помощью процедуры слияния хранилища. Контрольная сумма куска данных родительской таблицы включает кусок данных проекции. Другие процедуры аналогичны индексам пропуска данных. ### Анализ запросов {#projection-query-analysis} @@ -499,7 +499,7 @@ TTL expr За каждым `TTL` выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату `TTL` выражения: - `DELETE` - удалить данные (действие по умолчанию); -- `RECOMPRESS codec_name` - повторно сжать данные с помощью кодека `codec_name`; +- `RECOMPRESS codec_name` - повторно сжать данные с помощью кодека `codec_name`; - `TO DISK 'aaa'` - переместить данные на диск `aaa`; - `TO VOLUME 'bbb'` - переместить данные на том `bbb`; - `GROUP BY` - агрегировать данные. @@ -584,7 +584,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); Данные с истекшим `TTL` удаляются, когда ClickHouse мёржит куски данных. -Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера. +Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управления частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера. Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) перед `SELECT`. @@ -679,7 +679,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); - `policy_name_N` — название политики. Названия политик должны быть уникальны. - `volume_name_N` — название тома. Названия томов должны быть уникальны. - `disk` — диск, находящийся внутри тома. -- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. +- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты. - `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. @@ -730,7 +730,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); В приведенном примере, политика `hdd_in_order` реализует прицип [round-robin](https://ru.wikipedia.org/wiki/Round-robin_(%D0%B0%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC)). Так как в политике есть всего один том (`single`), то все записи производятся на его диски по круговому циклу. Такая политика может быть полезна при наличии в системе нескольких похожих дисков, но при этом не сконфигурирован RAID. Учтите, что каждый отдельный диск ненадёжен и чтобы не потерять важные данные это необходимо скомпенсировать за счет хранения данных в трёх копиях. -Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также, при заполнении диска `fast_ssd` более чем на 80% данные будут переносится на диск `disk1` фоновым процессом. +Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также при заполнении диска `fast_ssd` более чем на 80% данные будут переноситься на диск `disk1` фоновым процессом. Порядок томов в политиках хранения важен, при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них. @@ -752,7 +752,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Изменить политику хранения после создания таблицы можно при помощи запроса [ALTER TABLE ... MODIFY SETTING]. При этом необходимо учесть, что новая политика должна содержать все тома и диски предыдущей политики с теми же именами. -Количество потоков для фоновых перемещений кусков между дисками можно изменить с помощью настройки [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) +Количество потоков для фоновых перемещений кусков между дисками можно изменить с помощью настройки [background_move_pool_size](../../../operations/server-configuration-parameters/settings.md#background_move_pool_size) ### Особенности работы {#details} diff --git a/docs/ru/engines/table-engines/mergetree-family/replication.md b/docs/ru/engines/table-engines/mergetree-family/replication.md index 22cb2196ef1..2b4d89dbe0a 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replication.md +++ b/docs/ru/engines/table-engines/mergetree-family/replication.md @@ -64,9 +64,9 @@ ClickHouse хранит метаинформацию о репликах в [Apa Для очень больших кластеров, можно использовать разные кластеры ZooKeeper для разных шардов. Впрочем, на кластере Яндекс.Метрики (примерно 300 серверов) такой необходимости не возникает. -Репликация асинхронная, мульти-мастер. Запросы `INSERT` и `ALTER` можно направлять на любой доступный сервер. Данные вставятся на сервер, где выполнен запрос, а затем скопируются на остальные серверы. В связи с асинхронностью, только что вставленные данные появляются на остальных репликах с небольшой задержкой. Если часть реплик недоступна, данные на них запишутся тогда, когда они станут доступны. Если реплика доступна, то задержка составляет столько времени, сколько требуется для передачи блока сжатых данных по сети. Количество потоков для выполнения фоновых задач можно задать с помощью настройки [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size). +Репликация асинхронная, мульти-мастер. Запросы `INSERT` и `ALTER` можно направлять на любой доступный сервер. Данные вставятся на сервер, где выполнен запрос, а затем скопируются на остальные серверы. В связи с асинхронностью, только что вставленные данные появляются на остальных репликах с небольшой задержкой. Если часть реплик недоступна, данные на них запишутся тогда, когда они станут доступны. Если реплика доступна, то задержка составляет столько времени, сколько требуется для передачи блока сжатых данных по сети. Количество потоков для выполнения фоновых задач можно задать с помощью настройки [background_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_schedule_pool_size). -Движок `ReplicatedMergeTree` использует отдельный пул потоков для скачивания кусков данных. Размер пула ограничен настройкой [background_fetches_pool_size](../../../operations/settings/settings.md#background_fetches_pool_size), которую можно указать при перезапуске сервера. +Движок `ReplicatedMergeTree` использует отдельный пул потоков для скачивания кусков данных. Размер пула ограничен настройкой [background_fetches_pool_size](../../../operations/server-configuration-parameters/settings.md#background_fetches_pool_size), которую можно указать при перезапуске сервера. По умолчанию, запрос INSERT ждёт подтверждения записи только от одной реплики. Если данные были успешно записаны только на одну реплику, и сервер с этой репликой перестал существовать, то записанные данные будут потеряны. Вы можете включить подтверждение записи от нескольких реплик, используя настройку `insert_quorum`. @@ -251,8 +251,8 @@ $ sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data **Смотрите также** -- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) -- [background_fetches_pool_size](../../../operations/settings/settings.md#background_fetches_pool_size) +- [background_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_schedule_pool_size) +- [background_fetches_pool_size](../../../operations/server-configuration-parameters/settings.md#background_fetches_pool_size) - [execute_merges_on_single_replica_time_threshold](../../../operations/settings/settings.md#execute-merges-on-single-replica-time-threshold) - [max_replicated_fetches_network_bandwidth](../../../operations/settings/merge-tree-settings.md#max_replicated_fetches_network_bandwidth) - [max_replicated_sends_network_bandwidth](../../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth) diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 4987dafc11f..574d9273088 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -66,4 +66,4 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться. -Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/). +Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance.md). diff --git a/docs/ru/faq/operations/multi-region-replication.md b/docs/ru/faq/operations/multi-region-replication.md index bfe3231c247..eb53a69e7f6 100644 --- a/docs/ru/faq/operations/multi-region-replication.md +++ b/docs/ru/faq/operations/multi-region-replication.md @@ -10,4 +10,4 @@ The short answer is "yes". However, we recommend keeping latency between all reg Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas. -For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication/). +For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md). diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 02d8c62669d..f041db43d0f 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -77,15 +77,37 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. +#### Установка официального репозитория + Сначала нужно подключить официальный репозиторий: ``` bash sudo yum install -y yum-utils sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo -sudo yum install -y clickhouse-server clickhouse-client +``` -sudo /etc/init.d/clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +Для систем с пакетным менеджером `zypper` (openSUSE, SLES): + +``` bash +sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g +sudo zypper --gpg-auto-import-keys refresh clickhouse-stable +``` + +Далее любая команда `yum install` может быть заменена на `zypper install`. Чтобы указать желаемую версию, необходимо добавить `-$VERSION` в имени пакета, например `clickhouse-client-22.2.2.22`. + +#### Установка сервера и клиента + +``` bash +sudo yum install -y clickhouse-server clickhouse-client +``` + +#### Запуск сервера + +``` bash +sudo systemctl enable clickhouse-server +sudo systemctl start clickhouse-server +sudo systemctl status clickhouse-server +clickhouse-client # илм "clickhouse-client --password" если установлен пароль ```
diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index 803da2952fd..60a7463f70f 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -477,7 +477,7 @@ clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv ``` -ClickHouse has a lot of [settings to tune](../operations/settings/) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: +ClickHouse has a lot of [settings to tune](../operations/settings/index.md) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: ``` sql SELECT name, value, changed, description diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 8a4ec083242..4c22eae0207 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -132,7 +132,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe - `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`. - `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД ‘default’). - `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter. -- `--multiquery, -n` — если указано — разрешить выполнять несколько запросов, разделённых точкой с запятой. +- `--multiquery, -n` — Если указано, то после опции `--query` могут быть перечислены несколько запросов, разделенных точкой с запятой. Для удобства можно также опустить `--query` и передавать запросы непосредственно после `--multiquery`. - `--format, -f` — использовать указанный формат по умолчанию для вывода результата. - `--vertical, -E` — если указано, использовать по умолчанию формат [Vertical](../interfaces/formats.md#vertical) для вывода результата. То же самое, что `–format=Vertical`. В этом формате каждое значение выводится на отдельной строке, что удобно для отображения широких таблиц. - `--time, -t` — если указано, в неинтерактивном режиме вывести время выполнения запроса в поток ‘stderr’. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 59c77d082cf..bef5c223281 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -974,7 +974,7 @@ Array представлены как длина в формате varint (unsig столбцы из входных данных будут сопоставлены со столбцами таблицы по их именам, столбцы с неизвестными именами будут пропущены, если включен параметр [input_format_skip_unknown_fields](../operations/settings/settings.md#input_format_skip_unknown_fields). В противном случае первая строка будет пропущена. ::: - + ## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} То же самое что [RowBinary](#rowbinary), но добавляется заголовок: @@ -1326,7 +1326,7 @@ ClickHouse поддерживает настраиваемую точность Неподдерживаемые типы данных Parquet: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. +Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. ### Вставка и выборка данных {#inserting-and-selecting-data} @@ -1386,7 +1386,7 @@ ClickHouse поддерживает настраиваемую точность Неподдерживаемые типы данных Arrow: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Arrow. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. +Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Arrow. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. ### Вставка данных {#inserting-data-arrow} @@ -1444,7 +1444,7 @@ ClickHouse поддерживает настраиваемую точность Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse. +Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse. ### Вставка данных {#inserting-data-2} diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 62e97e3f61d..b8c5ee77f0c 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -243,7 +243,7 @@ $ echo 'SELECT 1' | curl -H 'X-ClickHouse-User: user' -H 'X-ClickHouse-Key: pass Если пользователь не задан,то используется `default`. Если пароль не задан, то используется пустой пароль. Также в параметрах URL вы можете указать любые настройки, которые будут использованы для обработки одного запроса, или целые профили настроек. Пример:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1 -Подробнее смотрите в разделе [Настройки](../operations/settings/). +Подробнее смотрите в разделе [Настройки](../operations/settings/index.md). ``` bash $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:8123/?' --data-binary @- diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index 6c63fb4c730..a4659e9ac4e 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -8,6 +8,7 @@ sidebar_label: "Клиентские библиотеки от сторонни :::danger "Disclaimer" Яндекс не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. +::: - Python: - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) @@ -24,6 +25,7 @@ sidebar_label: "Клиентские библиотеки от сторонни - [SeasClick C++ client](https://github.com/SeasX/SeasClick) - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) + - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) - Go - [clickhouse](https://github.com/kshvakov/clickhouse/) - [go-clickhouse](https://github.com/roistat/go-clickhouse) diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index c34b799347b..34d2f0e371a 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -177,19 +177,20 @@ sidebar_label: "Визуальные интерфейсы от сторонни ### Yandex DataLens {#yandex-datalens} -[Yandex DataLens](https://cloud.yandex.ru/services/datalens) — cервис визуализации и анализа данных. +[Yandex DataLens](https://datalens.yandex.ru) — cервис визуализации и анализа данных. Основные возможности: - Широкий выбор инструментов визуализации, от простых столбчатых диаграмм до сложных дашбордов. - Возможность опубликовать дашборды на широкую аудиторию. - Поддержка множества источников данных, включая ClickHouse. -- Хранение материализованных данных в кластере ClickHouse DataLens. -Для небольших проектов DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования. +DataLens [доступен бесплатно](https://cloud.yandex.ru/docs/datalens/pricing), в том числе и для коммерческого использования. +- [Знакомство с DataLens](https://youtu.be/57ngi_6BINE). +- [Чат сообщества DataLens](https://t.me/YandexDataLens) - [Документация DataLens](https://cloud.yandex.ru/docs/datalens/). -- [Пособие по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization). +- [Сценарий по визуализации данных из ClickHouse](https://cloud.yandex.ru/docs/solutions/datalens/data-from-ch-visualization). ### Holistics Software {#holistics-software} diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 67be83e13b2..3a931529b32 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -325,21 +325,21 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 -- Например, для кластера из 3 нод, алгоритм кворума продолжает работать при отказе не более чем одной ноды. Конфигурация кластера может быть изменена динамически с некоторыми ограничениями. -Переконфигурация также использует Raft, поэтому для добавление новой ноды кластера или исключения старой ноды из него требуется достижения кворума в рамках текущей конфигурации кластера. +Переконфигурация также использует Raft, поэтому для добавления новой ноды кластера или исключения старой ноды требуется достижение кворума в рамках текущей конфигурации кластера. Если в вашем кластере произошел отказ большего числа нод, чем допускает Raft для вашей текущей конфигурации и у вас нет возможности восстановить их работоспособность, Raft перестанет работать и не позволит изменить конфигурацию стандартным механизмом. -Тем не менее ClickHousr Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать класте используя только одну ноду кластера. +Тем не менее ClickHouse Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать кластер используя только одну ноду кластера. Этот механизм может использоваться только как крайняя мера, когда вы не можете восстановить существующие ноды кластера или запустить новый сервер с тем же идентификатором. Важно: - Удостоверьтесь, что отказавшие ноды не смогут в дальнейшем подключиться к кластеру в будущем. -- Не запускайте новые ноды, пока не завешите процедуру ниже. +- Не запускайте новые ноды, пока не завершите процедуру ниже. После того, как выполнили действия выше выполните следующие шаги. -1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные которые с этой ноды будут испольщзованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием. +1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные с этой ноды будут использованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием. 2. Перед дальнейшими действиям сделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`. 3. Измените настройки на всех нодах кластера, которые вы собираетесь использовать. -4. Отправьте команду `rcvr` на ноду, которую вы выбрали или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления. +4. Отправьте команду `rcvr` на ноду, которую вы выбрали, или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления. 5. Запускайте остальные ноды кластера по одной и проверяйте, что команда `mntr` возвращает `follower` в выводе состояния `zk_server_state` перед тем, как запустить следующую ноду. -6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и постедователей будут возвращать ошибку. +6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и последователей будут возвращать ошибку. 7. После достижения кворума лидер перейдет в нормальный режим работы и станет обрабатывать все запросы через Raft. Удостоверьтесь, что запрос `mntr` возвращает `leader` в выводе состояния `zk_server_state`. diff --git a/docs/ru/operations/opentelemetry.md b/docs/ru/operations/opentelemetry.md index b6c5e89bcc6..4e127e9e0f0 100644 --- a/docs/ru/operations/opentelemetry.md +++ b/docs/ru/operations/opentelemetry.md @@ -10,6 +10,7 @@ ClickHouse поддерживает [OpenTelemetry](https://opentelemetry.io/) :::danger "Предупреждение" Поддержка стандарта экспериментальная и будет со временем меняться. +::: ## Обеспечение поддержки контекста трассировки в ClickHouse diff --git a/docs/ru/operations/optimizing-performance/sampling-query-profiler.md b/docs/ru/operations/optimizing-performance/sampling-query-profiler.md index c77f6a1f290..3d5ec993fdf 100644 --- a/docs/ru/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/ru/operations/optimizing-performance/sampling-query-profiler.md @@ -30,7 +30,7 @@ To analyze the `trace_log` system table: - Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. -If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). +If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui.md#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). ## Example {#example} diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index bffa3c39a60..787153d4d19 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -26,6 +26,7 @@ ClickHouse перезагружает встроенные словари с з :::danger "Внимание" Лучше не использовать, если вы только начали работать с ClickHouse. +::: Общий вид конфигурации: @@ -46,7 +47,7 @@ ClickHouse перезагружает встроенные словари с з - `min_part_size` - Минимальный размер части таблицы. - `min_part_size_ratio` - Отношение размера минимальной части таблицы к полному размеру таблицы. - `method` - Метод сжатия. Возможные значения: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. -- `level` – Уровень сжатия. См. [Кодеки](../../sql-reference/statements/create/table/#create-query-common-purpose-codecs). +- `level` – Уровень сжатия. См. [Кодеки](../../sql-reference/statements/create/table.md#create-query-common-purpose-codecs). Можно сконфигурировать несколько разделов ``. @@ -151,7 +152,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ## custom_settings_prefixes {#custom_settings_prefixes} -Список префиксов для [пользовательских настроек](../../operations/settings/#custom_settings). Префиксы должны перечисляться через запятую. +Список префиксов для [пользовательских настроек](../../operations/settings/index.md#custom_settings). Префиксы должны перечисляться через запятую. **Пример** @@ -161,7 +162,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part **См. также** -- [Пользовательские настройки](../../operations/settings#custom_settings) +- [Пользовательские настройки](../../operations/settings/index.md#custom_settings) ## core_dump {#server_configuration_parameters-core_dump} @@ -224,6 +225,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` xml /clickhouse/tables/{uuid}/{shard} ``` + ## default_replica_name {#default_replica_name} Имя реплики в ZooKeeper. @@ -915,6 +917,72 @@ ClickHouse использует потоки из глобального пул 12000 ``` +## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} + +Задает количество потоков для выполнения фонового сброса данных в таблицах с движком [Buffer](../../engines/table-engines/special/buffer.md). + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 16. + +## background_move_pool_size {#background_move_pool_size} + +Задает количество потоков для фоновых перемещений кусков между дисками. Работает для таблиц с движком [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 8. + +## background_schedule_pool_size {#background_schedule_pool_size} + +Задает количество потоков для выполнения фоновых задач. Работает для [реплицируемых](../../engines/table-engines/mergetree-family/replication.md) таблиц, стримов в [Kafka](../../engines/table-engines/integrations/kafka.md) и обновления IP адресов у записей во внутреннем [DNS кеше](../server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 128. + +## background_fetches_pool_size {#background_fetches_pool_size} + +Задает количество потоков для скачивания кусков данных для [реплицируемых](../../engines/table-engines/mergetree-family/replication.md) таблиц. Для использования в продакшене с частыми небольшими вставками или медленным кластером ZooKeeper рекомендуется использовать значение по умолчанию. + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 8. + +## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size} + +Задает количество потоков для выполнения фоновых задач. Работает для таблиц с движком [Distributed](../../engines/table-engines/special/distributed.md). + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 16. + +## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} + +Задает количество потоков для фонового потокового вывода сообщений. + +Допустимые значения: + +- Положительное целое число. + +Значение по умолчанию: 16. + +**Смотрите также** + +- Движок [Kafka](../../engines/table-engines/integrations/kafka.md#kafka). +- Движок [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine). + + ## merge_tree {#server_configuration_parameters-merge_tree} Тонкая настройка таблиц семейства [MergeTree](../../operations/server-configuration-parameters/settings.md). @@ -1064,6 +1132,7 @@ ClickHouse использует потоки из глобального пул :::danger "Обратите внимание" Завершающий слеш обязателен. +::: **Пример** @@ -1330,6 +1399,7 @@ TCP порт для защищённого обмена данными с кли :::danger "Обратите внимание" Завершающий слеш обязателен. +::: **Пример** diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index a8d5f0ec453..50f4eb5ae6b 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -37,6 +37,10 @@ sidebar_label: "Настройки пользователей" + + + GRANT SELECT ON system.* + @@ -89,6 +93,27 @@ sidebar_label: "Настройки пользователей" Значение по умолчанию: 0. +### grants {#grants-user-setting} + +Настройка позволяет указать набор прав для заданного пользователя. +Каждый элемент списка должен представлять собой `GRANT` запрос без указания пользователей в самом запросе. + +Пример: + +```xml + + + GRANT SHOW ON *.* + GRANT CREATE ON *.* WITH GRANT OPTION + GRANT SELECT ON system.* + + +``` + +Настройка не может быть выставлена одновременно с +`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets` +или `allow_databases`. + ### user_name/networks {#user-namenetworks} Список сетей, из которых пользователь может подключиться к серверу ClickHouse. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 4025966ac21..fa3ea582c55 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1122,6 +1122,7 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' :::note "Предупреждение" Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. ::: + ## max_query_size {#settings-max_query_size} Максимальный кусок запроса, который будет считан в оперативку для разбора парсером языка SQL. @@ -2517,68 +2518,27 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} -Задает количество потоков для выполнения фонового сброса данных в таблицах с движком [Buffer](../../engines/table-engines/special/buffer.md). Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. - -Допустимые значения: - -- Положительное целое число. - -Значение по умолчанию: 16. +Параметр перенесен в [серверную конфигурацию](../../operations/server-configuration-parameters/settings.md/#background_buffer_flush_schedule_pool_size). ## background_move_pool_size {#background_move_pool_size} -Задает количество потоков для фоновых перемещений кусков между дисками. Работает для таблиц с движком [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. - -Допустимые значения: - -- Положительное целое число. - -Значение по умолчанию: 8. +Параметр перенесен в [серверную конфигурацию](../../operations/server-configuration-parameters/settings.md/#background_move_pool_size). ## background_schedule_pool_size {#background_schedule_pool_size} -Задает количество потоков для выполнения фоновых задач. Работает для [реплицируемых](../../engines/table-engines/mergetree-family/replication.md) таблиц, стримов в [Kafka](../../engines/table-engines/integrations/kafka.md) и обновления IP адресов у записей во внутреннем [DNS кеше](../server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. - -Допустимые значения: - -- Положительное целое число. - -Значение по умолчанию: 128. +Параметр перенесен в [серверную конфигурацию](../../operations/server-configuration-parameters/settings.md/#background_schedule_pool_size). ## background_fetches_pool_size {#background_fetches_pool_size} -Задает количество потоков для скачивания кусков данных для [реплицируемых](../../engines/table-engines/mergetree-family/replication.md) таблиц. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. Для использования в продакшене с частыми небольшими вставками или медленным кластером ZooKeeper рекомендуется использовать значение по умолчанию. - -Допустимые значения: - -- Положительное целое число. - -Значение по умолчанию: 8. +Параметр перенесен в [серверную конфигурацию](../../operations/server-configuration-parameters/settings.md/#background_fetches_pool_size). ## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size} -Задает количество потоков для выполнения фоновых задач. Работает для таблиц с движком [Distributed](../../engines/table-engines/special/distributed.md). Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. - -Допустимые значения: - -- Положительное целое число. - -Значение по умолчанию: 16. +Параметр перенесен в [серверную конфигурацию](../../operations/server-configuration-parameters/settings.md/#background_distributed_schedule_pool_size). ## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} -Задает количество потоков для фонового потокового вывода сообщений. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. - -Допустимые значения: - -- Положительное целое число. - -Значение по умолчанию: 16. - -**Смотрите также** - -- Движок [Kafka](../../engines/table-engines/integrations/kafka.md#kafka). -- Движок [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine). +Параметр перенесен в [серверную конфигурацию](../../operations/server-configuration-parameters/settings.md/#background_message_broker_schedule_pool_size). ## format_avro_schema_registry_url {#format_avro_schema_registry_url} @@ -3225,16 +3185,6 @@ SELECT * FROM test2; Значение по умолчанию: `0`. -## allow_experimental_geo_types {#allow-experimental-geo-types} - -Разрешает использование экспериментальных типов данных для работы с [географическими структурами](../../sql-reference/data-types/geo.md). - -Возможные значения: -- 0 — использование типов данных для работы с географическими структурами не поддерживается. -- 1 — использование типов данных для работы с географическими структурами поддерживается. - -Значение по умолчанию: `0`. - ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Добавляет модификатор `SYNC` ко всем запросам `DROP` и `DETACH`. @@ -3388,6 +3338,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; │ 109 │ └─────┘ ``` + ## http_connection_timeout {#http_connection_timeout} Тайм-аут для HTTP-соединения (в секундах). @@ -3627,7 +3578,7 @@ SETTINGS index_granularity = 8192 │ Строка с идентификатором снэпшота, из которого будет выполняться [исходный дамп таблиц PostgreSQL](../../engines/database-engines/materialized-postgresql.md). Эта настройка должна использоваться совместно с [materialized_postgresql_replication_slot](#materialized-postgresql-replication-slot). -## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} +## optimize_use_projections {#optimize_use_projections} Включает или отключает поддержку [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) при обработке запросов `SELECT`. @@ -3640,7 +3591,7 @@ SETTINGS index_granularity = 8192 │ ## force_optimize_projection {#force-optimize-projection} -Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [allow_experimental_projection_optimization](#allow-experimental-projection-optimization)). +Включает или отключает обязательное использование [проекций](../../engines/table-engines/mergetree-family/mergetree.md#projections) в запросах `SELECT`, если поддержка проекций включена (см. настройку [optimize_use_projections](#optimize_use_projections)). Возможные значения: @@ -4084,3 +4035,32 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1; Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated). Значение по умолчанию: `''`. + +## partial_result_on_first_cancel {#partial_result_on_first_cancel} +Если установлено значение `true` и пользователь хочет прервать запрос (например, с помощью `Ctrl+C` на клиенте), то запрос продолжает выполнение только для данных, которые уже были считаны из таблицы. После этого он вернет частичный результат запроса для той части таблицы, которая была прочитана. Чтобы полностью остановить выполнение запроса без частичного результата, пользователь должен отправить 2 запроса отмены. + +**Пример с выключенной настройкой при нажатии Ctrl+C** +```sql +SELECT sum(number) FROM numbers(10000000000) + +Cancelling query. +Ok. +Query was cancelled. + +0 rows in set. Elapsed: 1.334 sec. Processed 52.65 million rows, 421.23 MB (39.48 million rows/s., 315.85 MB/s.) +``` + +**Пример с включенной настройкой при нажатии Ctrl+C** +```sql +SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true + +┌──────sum(number)─┐ +│ 1355411451286266 │ +└──────────────────┘ + +1 row in set. Elapsed: 1.331 sec. Processed 52.13 million rows, 417.05 MB (39.17 million rows/s., 313.33 MB/s.) +``` + +Возможные значения:: `true`, `false` + +Значение по умолчанию: `false` diff --git a/docs/ru/operations/storing-data.md b/docs/ru/operations/storing-data.md index 2f5c9c95ea4..56081c82bc9 100644 --- a/docs/ru/operations/storing-data.md +++ b/docs/ru/operations/storing-data.md @@ -82,7 +82,7 @@ sidebar_label: "Хранение данных на внешних дисках" - `type` — `encrypted`. Иначе зашифрованный диск создан не будет. - `disk` — тип диска для хранения данных. -- `key` — ключ для шифрования и расшифровки. Тип: [Uint64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме. +- `key` — ключ для шифрования и расшифровки. Тип: [UInt64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме. Вы можете указать несколько ключей, используя атрибут `id` (смотрите пример выше). Необязательные параметры: diff --git a/docs/ru/operations/system-tables/information_schema.md b/docs/ru/operations/system-tables/information_schema.md index 6a9b8134dad..691fec19039 100644 --- a/docs/ru/operations/system-tables/information_schema.md +++ b/docs/ru/operations/system-tables/information_schema.md @@ -178,7 +178,7 @@ table_type: BASE TABLE - `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` запрос для представления. - `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, нет проверки. - `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, представление не обновляется. -- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — показывает является ли представление [материализованным](../../sql-reference/statements/create/view/#materialized). Возможные значения: +- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — показывает является ли представление [материализованным](../../sql-reference/statements/create/view.md#materialized). Возможные значения: - `NO` — создано обычное представление. - `YES` — создано материализованное представление. - `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, триггер не обновляется. diff --git a/docs/ru/operations/system-tables/replicated_fetches.md b/docs/ru/operations/system-tables/replicated_fetches.md index 0b91a02cf14..c13f058aae1 100644 --- a/docs/ru/operations/system-tables/replicated_fetches.md +++ b/docs/ru/operations/system-tables/replicated_fetches.md @@ -68,4 +68,4 @@ thread_id: 54 **Смотрите также** -- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated) +- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system.md#query-language-system-replicated) diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md index d3185f4fcb0..73de78d1c15 100644 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -60,7 +60,7 @@ clickhouse-benchmark [keys] < queries_file; - `--stage=WORD` — стадия обработки запроса на сервере. ClickHouse останавливает обработку запроса и возвращает ответ `clickhouse-benchmark` на заданной стадии. Возможные значения: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. - `--help` — показывает справку. -Если нужно применить [настройки](../../operations/settings/) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. +Если нужно применить [настройки](../../operations/settings/index.md) для запросов, их можно передать как ключ `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. ## Вывод {#clickhouse-benchmark-output} diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index 7be933d67d7..50434419651 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -7,7 +7,7 @@ sidebar_position: 141 Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована. -Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), упорядоченных по некоторому временному бакету согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, невозможно объединить их в "правом" порядке. Функция отслеживает `timestamp` наблюдаемых значений, поэтому возможно правильно упорядочить состояния во время слияния. +Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому округленному временному интервалу, согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков. Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md new file mode 100644 index 00000000000..2f8c6bb6760 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md @@ -0,0 +1,117 @@ +--- +slug: /ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest +sidebar_position: 300 +sidebar_label: kolmogorovSmirnovTest +--- + +# kolmogorovSmirnovTest {#kolmogorovSmirnovTest} + +Проводит статистический тест Колмогорова-Смирнова для двух независимых выборок. + +**Синтаксис** + +``` sql +kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Выборки должны принадлежать непрерывным одномерным распределениям. + +**Аргументы** + +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Параметры** + +- `alternative` — альтернативная гипотеза (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + Пусть F(x) и G(x) - функции распределения первой и второй выборки соотвественно. + - `'two-sided'` + Нулевая гипотеза состоит в том, что выборки происходит из одного и того же распределение, то есть F(x) = G(x) для любого x. + Альтернатива - выборки принадлежат разным распределениям. + - `'greater'` + Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное меньше элементов из второй выборки, + то есть функция распределения первой выборки лежит выше и соотвественно левее, чем функция распределения второй выборки. + Таким образом это означает, что F(x) >= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) < G(x) хотя бы для одного x. + - `'less'`. + Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное больше элементов из второй выборки, + то есть функция распределения первой выборки лежит ниже и соотвественно правее, чем функция распределения второй выборки. + Таким образом это означает, что F(x) <= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) > G(x) хотя бы для одного x. +- `computation_method` — метод, используемый для вычисления p-value. (Необязательный параметр, по умолчанию: `'auto'`.) [String](../../../sql-reference/data-types/string.md). + - `'exact'` - вычисление производится с помощью вычисления точного распределения статистики. Требует большого количества вычислительных ресурсов и расточительно для больших выборок. + - `'asymp'`(`'asymptotic'`) - используется приближенное вычисление. Для больших выборок приближенный результат и точный почти идентичны. + - `'auto'` - значение вычисляется точно (с помощью метода `'exact'`), если максимальный размер двух выборок не превышает 10'000. + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: + +- вычисленное статистики. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Запрос: + +``` sql +SELECT kolmogorovSmirnovTest('less', 'exact')(value, num) +FROM +( + SELECT + randNormal(0, 10) AS value, + 0 AS num + FROM numbers(10000) + UNION ALL + SELECT + randNormal(0, 10) AS value, + 1 AS num + FROM numbers(10000) +) +``` + +Результат: + +``` text +┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐ +│ (0.009899999999999996,0.37528595205132287) │ +└────────────────────────────────────────────────────┘ +``` + +Заметки: +P-value больше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза не отвергается. + + +Запрос: + +``` sql +SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num) +FROM +( + SELECT + randStudentT(10) AS value, + 0 AS num + FROM numbers(100) + UNION ALL + SELECT + randNormal(0, 10) AS value, + 1 AS num + FROM numbers(100) +) +``` + +Результат: + +``` text +┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐ +│ (0.4100000000000002,6.61735760482795e-8) │ +└─────────────────────────────────────────────────────────┘ +``` + +Заметки: +P-value меньше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза отвергается. + + +**Смотрите также** + +- [Критерий согласия Колмогорова-Смирнова](https://ru.wikipedia.org/wiki/%D0%9A%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D1%81%D0%BE%D0%B3%D0%BB%D0%B0%D1%81%D0%B8%D1%8F_%D0%9A%D0%BE%D0%BB%D0%BC%D0%BE%D0%B3%D0%BE%D1%80%D0%BE%D0%B2%D0%B0) diff --git a/docs/ru/sql-reference/data-types/aggregatefunction.md b/docs/ru/sql-reference/data-types/aggregatefunction.md index 21b452acb1d..e42b467e4af 100644 --- a/docs/ru/sql-reference/data-types/aggregatefunction.md +++ b/docs/ru/sql-reference/data-types/aggregatefunction.md @@ -6,7 +6,7 @@ sidebar_label: AggregateFunction # AggregateFunction {#data-type-aggregatefunction} -Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления] (../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. +Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`. `AggregateFunction(name, types_of_arguments…)` — параметрический тип данных. diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index b513c51397e..e8d4a3ee9fd 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/#settings-date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/#settings-date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/index.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) diff --git a/docs/ru/sql-reference/data-types/geo.md b/docs/ru/sql-reference/data-types/geo.md index a7c5f79b0be..52c7dd97da0 100644 --- a/docs/ru/sql-reference/data-types/geo.md +++ b/docs/ru/sql-reference/data-types/geo.md @@ -8,12 +8,8 @@ sidebar_label: Географические структуры ClickHouse поддерживает типы данных для отображения географических объектов — точек (местоположений), территорий и т.п. -:::danger "Предупреждение" - Сейчас использование типов данных для работы с географическими структурами является экспериментальной возможностью. Чтобы использовать эти типы данных, включите настройку `allow_experimental_geo_types = 1`. - **См. также** - [Хранение географических структур данных](https://ru.wikipedia.org/wiki/GeoJSON). -- Настройка [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types). ## Point {#point-data-type} @@ -24,7 +20,6 @@ ClickHouse поддерживает типы данных для отображ Запрос: ```sql -SET allow_experimental_geo_types = 1; CREATE TABLE geo_point (p Point) ENGINE = Memory(); INSERT INTO geo_point VALUES((10, 10)); SELECT p, toTypeName(p) FROM geo_point; @@ -46,7 +41,6 @@ SELECT p, toTypeName(p) FROM geo_point; Запрос: ```sql -SET allow_experimental_geo_types = 1; CREATE TABLE geo_ring (r Ring) ENGINE = Memory(); INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]); SELECT r, toTypeName(r) FROM geo_ring; @@ -68,7 +62,6 @@ SELECT r, toTypeName(r) FROM geo_ring; Запись в этой таблице описывает многоугольник с одной дырой: ```sql -SET allow_experimental_geo_types = 1; CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory(); INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]); SELECT pg, toTypeName(pg) FROM geo_polygon; @@ -91,7 +84,6 @@ SELECT pg, toTypeName(pg) FROM geo_polygon; Запись в этой таблице описывает элемент, состоящий из двух многоугольников — первый без дыр, а второй с одной дырой: ```sql -SET allow_experimental_geo_types = 1; CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory(); INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]); SELECT mpg, toTypeName(mpg) FROM geo_multipolygon; diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 856275ed8f2..109ceee7852 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -10,6 +10,7 @@ sidebar_label: Interval :::danger "Внимание" Нельзя использовать типы данных `Interval` для хранения данных в таблице. +::: Структура: diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md index 76370d01c0d..8953134d154 100644 --- a/docs/ru/sql-reference/data-types/tuple.md +++ b/docs/ru/sql-reference/data-types/tuple.md @@ -34,7 +34,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x) ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh} -При создании кортежа «на лету» ClickHouse автоматически определяет тип каждого аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql-reference/data-types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md). +При создании кортежа «на лету» ClickHouse автоматически определяет тип всех аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql-reference/data-types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md). Пример автоматического определения типа данных: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 64637edc4a4..24f29d3bf53 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -61,7 +61,7 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) - Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона, последующие элементы могут задавать дырки, вырезаемые из него. -Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживается только двумерные точки. +Точки могут задаваться массивом или кортежем из своих координат. В текущей реализации поддерживаются только двумерные точки. Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах. @@ -80,7 +80,7 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) - `POLYGON`. Синоним к `POLYGON_INDEX_CELL`. Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями. -Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон. +Важным отличием является то, что здесь ключами являются точки, для которых хочется найти содержащий их полигон. **Пример** diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 8c01b8295bf..a711287ae8e 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -59,6 +59,7 @@ ClickHouse поддерживает следующие виды ключей: :::danger "Обратите внимание" Ключ не надо дополнительно описывать в атрибутах. +::: ### Числовой ключ {#ext_dict-numeric-key} diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 314fefab5eb..a262a354889 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -14,7 +14,7 @@ ClickHouse: - Периодически обновляет их и динамически подгружает отсутствующие значения. - Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../../statements/create/dictionary.md#create-dictionary-query). -Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config). +Конфигурация внешних словарей может находиться в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config). Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load). diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index bb2716b2741..c43323d68fd 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -695,7 +695,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; :::note "Примечание" Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: - + +## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} + +То же, что и `arraySort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в возрастающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. + ## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`. @@ -797,6 +801,10 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` +## arrayPartialReverseSort(\[func,\] limit, arr, …) {#array_functions-sort} + +То же, что и `arrayReverseSort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в убывающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. + ## arrayUniq(arr, …) {#array-functions-arrayuniq} Если передан один аргумент, считает количество разных элементов в массиве. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 8fbcaf9568b..5111441c0b9 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -235,13 +235,13 @@ SELECT toDateTime('2021-04-21 10:20:30', 'Europe/Moscow') AS Time, toTypeName(Ti ## toUnixTimestamp {#to-unix-timestamp} -Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). -Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию). +Переводит строку, дату или дату-с-временем в [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time), имеющий тип `UInt32`. +Строка может сопровождаться вторым (необязательным) аргументом, указывающим часовой пояс. **Синтаксис** ``` sql -toUnixTimestamp(datetime) +toUnixTimestamp(date) toUnixTimestamp(str, [timezone]) ``` @@ -256,19 +256,33 @@ toUnixTimestamp(str, [timezone]) Запрос: ``` sql -SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp; +SELECT + '2017-11-05 08:07:47' AS dt_str, + toUnixTimestamp(dt_str) AS from_str, + toUnixTimestamp(dt_str, 'Asia/Tokyo') AS from_str_tokyo, + toUnixTimestamp(toDateTime(dt_str)) AS from_datetime, + toUnixTimestamp(toDateTime64(dt_str, 0)) AS from_datetime64, + toUnixTimestamp(toDate(dt_str)) AS from_date, + toUnixTimestamp(toDate32(dt_str)) AS from_date32 +FORMAT Vertical; ``` Результат: ``` text -┌─unix_timestamp─┐ -│ 1509836867 │ -└────────────────┘ +Row 1: +────── +dt_str: 2017-11-05 08:07:47 +from_str: 1509869267 +from_str_tokyo: 1509836867 +from_datetime: 1509869267 +from_datetime64: 1509869267 +from_date: 1509840000 +from_date32: 1509840000 ``` :::note -Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`. +Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`. Поведение для * `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат. diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md index 68a32b80e5b..711d903110c 100644 --- a/docs/ru/sql-reference/functions/encryption-functions.md +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -107,7 +107,7 @@ SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%'; ## aes_encrypt_mysql {#aes_encrypt_mysql} -Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt). +Совместима с шифрованием mysql, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt). При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`. @@ -298,7 +298,7 @@ SELECT comment, decrypt('aes-256-ofb', secret, '12345678910121314151617181920212 ## aes_decrypt_mysql {#aes_decrypt_mysql} -Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). +Совместима с шифрованием mysql и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`. diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 7d04dff6b72..26497ef21d3 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -22,7 +22,7 @@ sidebar_label: "Функции интроспекции" ClickHouse сохраняет отчеты профилировщика в [журнал трассировки](../../operations/system-tables/trace_log.md#system_tables-trace_log) в системной таблице. Убедитесь, что таблица и профилировщик настроены правильно. -## addresssToLine {#addresstoline} +## addressToLine {#addresstoline} Преобразует адрес виртуальной памяти внутри процесса сервера ClickHouse в имя файла и номер строки в исходном коде ClickHouse. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index f457b54ae28..de54f1b3607 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -2136,7 +2136,7 @@ countDigits(x) :::note "Примечание" Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow). ::: - + **Пример** Запрос: @@ -2297,7 +2297,7 @@ enabledRoles() ## defaultRoles {#default-roles} -Возвращает имена ролей, которые задаются по умолчанию для текущего пользователя при входе в систему. Изначально это все роли, которые разрешено использовать текущему пользователю (см. [GRANT](../../sql-reference/statements/grant/#grant-select)). Список ролей по умолчанию может быть изменен с помощью выражения [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement). +Возвращает имена ролей, которые задаются по умолчанию для текущего пользователя при входе в систему. Изначально это все роли, которые разрешено использовать текущему пользователю (см. [GRANT](../../sql-reference/statements/grant.md#grant-select)). Список ролей по умолчанию может быть изменен с помощью выражения [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement). **Синтаксис** diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index c2beb55fee1..298b7bbc93e 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -553,6 +553,44 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; └────────────┴───────┘ ``` +## toDecimalString + +Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом. + +**Синтаксис** + +``` sql +toDecimalString(number, scale) +``` + +**Параметры** + +- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md), +- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md). + * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов), + * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60. + +**Возвращаемое значение** + +- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части. + При необходимости число округляется по стандартным правилам арифметики. + +**Пример использования** + +Запрос: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Результат: + +```response +┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} diff --git a/docs/ru/sql-reference/operators/exists.md b/docs/ru/sql-reference/operators/exists.md index 3fc085fe021..38855abbcf3 100644 --- a/docs/ru/sql-reference/operators/exists.md +++ b/docs/ru/sql-reference/operators/exists.md @@ -8,7 +8,8 @@ slug: /ru/sql-reference/operators/exists `EXISTS` может быть использован в секции [WHERE](../../sql-reference/statements/select/where.md). :::danger "Предупреждение" - Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. + Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. +::: **Синтаксис** diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md index fa679b890a7..60400fb2b31 100644 --- a/docs/ru/sql-reference/operators/in.md +++ b/docs/ru/sql-reference/operators/in.md @@ -38,9 +38,9 @@ SELECT '1' IN (SELECT 1); └──────────────────────┘ ``` -Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. +Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемыми вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. -Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. +Если в качестве правой части оператора, указано имя таблицы, имеющей движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. В подзапросе может быть указано более одного столбца для фильтрации кортежей. Пример: @@ -49,9 +49,9 @@ SELECT '1' IN (SELECT 1); SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ... ``` -Типы столбцов слева и справа оператора IN, должны совпадать. +Типы столбцов слева и справа оператора IN должны совпадать. -Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда функциях. +Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда-функциях. Пример: ``` sql @@ -122,7 +122,7 @@ FROM t_null Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. -:::note "Attention" +:::note "Внимание" Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. ::: При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. @@ -228,7 +228,7 @@ SELECT CounterID, count() FROM distributed_table_1 WHERE UserID IN (SELECT UserI SETTINGS max_parallel_replicas=3 ``` -преобразуются на каждом сервере в +преобразуется на каждом сервере в ```sql SELECT CounterID, count() FROM local_table_1 WHERE UserID IN (SELECT UserID FROM local_table_2 WHERE CounterID < 100) diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 57c426cb5ad..b5fec3cb38c 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -263,6 +263,7 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 │ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │ └─────────────────────┴─────────────────────┴─────────────────────┘ ``` +::: **Смотрите также** diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 95d02c062bd..90688c9ece2 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -102,7 +102,11 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab ``` Копирует партицию из таблицы `table1` в таблицу `table2`. -Обратите внимание, что данные не удаляются ни из `table1`, ни из `table2`. + +Обратите внимание, что: + +- Данные не удаляются ни из `table1`, ни из `table2`. +- `table1` может быть временной таблицей. Следует иметь в виду: @@ -118,7 +122,12 @@ ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM tab ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1 ``` -Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. Данные из `table1` не удаляются. +Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. + +Обратите внимание, что: + +- Данные из `table1` не удаляются. +- `table1` может быть временной таблицей. Следует иметь в виду: diff --git a/docs/ru/sql-reference/statements/alter/view.md b/docs/ru/sql-reference/statements/alter/view.md index 2d4823bba3a..e6f6730ff99 100644 --- a/docs/ru/sql-reference/statements/alter/view.md +++ b/docs/ru/sql-reference/statements/alter/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query} -Вы можеие изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. +Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md). diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index c29d575cac1..64eae49be6c 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -260,8 +260,8 @@ ENGINE = MergeTree() Кодеки шифрования: -- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV. -- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV. +- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV. +- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV. Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит). @@ -274,10 +274,10 @@ ENGINE = MergeTree() **Пример** ```sql -CREATE TABLE mytable +CREATE TABLE mytable ( x String Codec(AES_128_GCM_SIV) -) +) ENGINE = MergeTree ORDER BY x; ``` @@ -287,10 +287,10 @@ ENGINE = MergeTree ORDER BY x; **Пример** ```sql -CREATE TABLE mytable +CREATE TABLE mytable ( x String Codec(Delta, LZ4, AES_128_GCM_SIV) -) +) ENGINE = MergeTree ORDER BY x; ``` @@ -299,9 +299,9 @@ ENGINE = MergeTree ORDER BY x; ClickHouse поддерживает временные таблицы со следующими характеристиками: - Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения. -- Временная таблица использует только модуль памяти. +- Временная таблица использует движок таблиц Memory когда движок не указан и она может использовать любой движок таблиц за исключением движков Replicated и `KeeperMap`. - Невозможно указать базу данных для временной таблицы. Она создается вне баз данных. -- Невозможно создать временную таблицу распределнным DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии. +- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии. - Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица. - При распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы. @@ -313,7 +313,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) +) [ENGINE = engine] ``` В большинстве случаев, временные таблицы создаются не вручную, а при использовании внешних данных для запроса, или при распределённом `(GLOBAL) IN`. Подробнее см. соответствующие разделы @@ -344,7 +344,9 @@ REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345; ### Синтаксис +```sql {CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name +``` Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку. diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 7c281634c98..9b8fafabfcc 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -37,6 +37,19 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US `WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли. `WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли. +## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax} + +```sql +GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +``` + +- `privilege` — Тип привилегии +- `role` — Роль пользователя ClickHouse. +- `user` — Пользователь ClickHouse. + +Использование выражения `CURRENT GRANTS` позволяет присвоить все указанные и доступные для присвоения привилегии. +Если список привелегий не задан, то указанный пользователь или роль получат все доступные привилегии для `CURRENT_USER`. + ## Использование {#grant-usage} Для использования `GRANT` пользователь должен иметь привилегию `GRANT OPTION`. Пользователь может выдавать привилегии только внутри области действий назначенных ему самому привилегий. @@ -107,7 +120,8 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION - [CREATE](#grant-create) - `CREATE DATABASE` - `CREATE TABLE` - - `CREATE TEMPORARY TABLE` + - `CREATE ARBITRARY TEMPORARY TABLE` + - `CREATE TEMPORARY TABLE` - `CREATE VIEW` - `CREATE DICTIONARY` - `CREATE FUNCTION` @@ -314,7 +328,8 @@ GRANT INSERT(x,y) ON db.table TO john - `CREATE`. Уровень: `GROUP` - `CREATE DATABASE`. Уровень: `DATABASE` - `CREATE TABLE`. Уровень: `TABLE` - - `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL` + - `CREATE ARBITRARY TEMPORARY TABLE`. Уровень: `GLOBAL` + - `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL` - `CREATE VIEW`. Уровень: `VIEW` - `CREATE DICTIONARY`. Уровень: `DICTIONARY` diff --git a/docs/ru/sql-reference/statements/optimize.md b/docs/ru/sql-reference/statements/optimize.md index b70bba2d765..26993183232 100644 --- a/docs/ru/sql-reference/statements/optimize.md +++ b/docs/ru/sql-reference/statements/optimize.md @@ -10,6 +10,7 @@ sidebar_label: OPTIMIZE :::danger "Внимание" `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`. +::: **Синтаксис** diff --git a/docs/ru/sql-reference/statements/select/array-join.md b/docs/ru/sql-reference/statements/select/array-join.md index 9d2dbf54a2b..6c7fcbba7cc 100644 --- a/docs/ru/sql-reference/statements/select/array-join.md +++ b/docs/ru/sql-reference/statements/select/array-join.md @@ -146,7 +146,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS ma └───────┴─────────┴───┴─────┴────────┘ ``` -В приведенном ниже примере используется функция [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate): +В приведенном ниже примере используется функция [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate): ``` sql SELECT s, arr, a, num, arrayEnumerate(arr) @@ -259,7 +259,7 @@ ARRAY JOIN nest AS n; └───────┴─────┴─────┴─────────┴────────────┘ ``` -Пример использования функции [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate): +Пример использования функции [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate): ``` sql SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index 450543cb7a2..ea4f357d33c 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -108,7 +108,7 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; ## Модификатор WITH CUBE {#with-cube-modifier} -Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`. +Модификатор `WITH CUBE` применяется для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`. Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка. diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 6853a8c8c90..6be438f8c43 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -16,7 +16,7 @@ FROM (ON )|(USING ) ... ``` -Выражения из секции `ON` и столбцы из секции `USING` называется «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы. +Выражения из секции `ON` и столбцы из секции `USING` называются «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы. ## Поддерживаемые типы соединения {#select-join-types} @@ -28,7 +28,7 @@ FROM - `FULL OUTER JOIN`, не совпадающие строки из обеих таблиц возвращаются в дополнение к совпадающим строкам. - `CROSS JOIN`, производит декартово произведение таблиц целиком, ключи соединения не указываются. -Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является ли указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md). +Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md). Дополнительные типы соединений, доступные в ClickHouse: @@ -62,7 +62,7 @@ FROM Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат. -Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый агрумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`. +Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`. :::note "Примечание" Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`). @@ -280,7 +280,7 @@ SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b); Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../../../engines/table-engines/special/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке. -В некоторых случаях это более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`. +В некоторых случаях более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`. Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе «Внешние словари». diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md index 40efb862250..a3390501f2c 100644 --- a/docs/ru/sql-reference/statements/select/order-by.md +++ b/docs/ru/sql-reference/statements/select/order-by.md @@ -67,7 +67,7 @@ sidebar_label: ORDER BY ## Примеры с использованием сравнения {#collation-examples} -Пример с значениями типа [String](../../../sql-reference/data-types/string.md): +Пример со значениями типа [String](../../../sql-reference/data-types/string.md): Входная таблица: @@ -241,13 +241,13 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en'; └───┴─────────┘ ``` -## Деталь реализации {#implementation-details} +## Детали реализации {#implementation-details} Если кроме `ORDER BY` указан также не слишком большой [LIMIT](limit.md), то расходуется меньше оперативки. Иначе расходуется количество памяти, пропорциональное количеству данных для сортировки. При распределённой обработке запроса, если отсутствует [GROUP BY](group-by.md), сортировка частично делается на удалённых серверах, а на сервере-инициаторе запроса производится слияние результатов. Таким образом, при распределённой сортировке, может сортироваться объём данных, превышающий размер памяти на одном сервере. Существует возможность выполнять сортировку во внешней памяти (с созданием временных файлов на диске), если оперативной памяти не хватает. Для этого предназначена настройка `max_bytes_before_external_sort`. Если она выставлена в 0 (по умолчанию), то внешняя сортировка выключена. Если она включена, то при достижении объёмом данных для сортировки указанного количества байт, накопленные данные будут отсортированы и сброшены во временный файл. После того, как все данные будут прочитаны, будет произведено слияние всех сортированных файлов и выдача результата. Файлы записываются в директорию `/var/lib/clickhouse/tmp/` (по умолчанию, может быть изменено с помощью параметра `tmp_path`) в конфиге. -На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому, значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставите `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB. +На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставьте `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB. Внешняя сортировка работает существенно менее эффективно, чем сортировка в оперативке. @@ -366,9 +366,9 @@ ORDER BY └────────────┴────────────┴──────────┘ ``` -Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`. +Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последовательность заполнения для `d1`. -Cледующий запрос (с измененым порядком в ORDER BY): +Cледующий запрос (с измененным порядком в ORDER BY): ```sql SELECT toDate((number * 10) * 86400) AS d1, diff --git a/docs/ru/sql-reference/statements/select/prewhere.md b/docs/ru/sql-reference/statements/select/prewhere.md index e37aec814ec..092370d4b3a 100644 --- a/docs/ru/sql-reference/statements/select/prewhere.md +++ b/docs/ru/sql-reference/statements/select/prewhere.md @@ -13,7 +13,7 @@ Prewhere — это оптимизация для более эффективн `PREWHERE` имеет смысл использовать, если есть условия фильтрации, которые использует меньшинство столбцов из тех, что есть в запросе, но достаточно сильно фильтрует данные. Таким образом, сокращается количество читаемых данных. -В запросе может быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`. +В запросе могут быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`. Если значение параметра [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) равно 0, эвристика по автоматическому перемещению части выражений из `WHERE` к `PREWHERE` отключается. diff --git a/docs/ru/sql-reference/statements/select/sample.md b/docs/ru/sql-reference/statements/select/sample.md index e63479d6c27..decef52d06f 100644 --- a/docs/ru/sql-reference/statements/select/sample.md +++ b/docs/ru/sql-reference/statements/select/sample.md @@ -10,7 +10,7 @@ sidebar_label: SAMPLE Сэмплирование имеет смысл, когда: 1. Точность результата не важна, например, для оценочных расчетов. -2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть \<100 мс. При этом точность расчета имеет более низкий приоритет. +2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть <100 мс. При этом точность расчета имеет более низкий приоритет. 3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской. :::note "Внимание" diff --git a/docs/ru/sql-reference/statements/select/union.md b/docs/ru/sql-reference/statements/select/union.md index 4dad63c2c7c..05d20100452 100644 --- a/docs/ru/sql-reference/statements/select/union.md +++ b/docs/ru/sql-reference/statements/select/union.md @@ -26,7 +26,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса. -При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип. +При объединении выполняется приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип. Запросы, которые являются частью `UNION`, могут быть заключены в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION` запросы в подзапрос в секции [FROM](from.md). diff --git a/docs/ru/sql-reference/statements/select/with.md b/docs/ru/sql-reference/statements/select/with.md index 58f380b0a85..c4121a6569a 100644 --- a/docs/ru/sql-reference/statements/select/with.md +++ b/docs/ru/sql-reference/statements/select/with.md @@ -5,7 +5,7 @@ sidebar_label: WITH # Секция WITH {#with-clause} -Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивные_запросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`. +ClickHouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивные_запросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`. ## Синтаксис diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index a7dec7abe27..22a74648eab 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -269,13 +269,17 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ### SYNC REPLICA {#query_language-system-sync-replica} -Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, будет работать до достижения `receive_timeout`, если синхронизация для таблицы отключена в настоящий момент времени: +Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, но не более `receive_timeout` секунд: ``` sql -SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name +SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL] ``` -После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` синхронизирует команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все синхронизированные команды. +После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` загружает команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все загруженные команды. Поддерживаются следующие модификаторы: + + - Если указан модификатор `STRICT`, то запрос ждёт когда очередь репликации станет пустой. Строгий вариант запроса может никогда не завершиться успешно, если в очереди репликации постоянно появляются новые записи. + - Если указан модификатор `LIGHTWEIGHT`, то запрос ждёт когда будут обработаны записи `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` и `DROP_PART`. + - Если указан модификатор `PULL`, то запрос только загружает записи очереди репликации из ZooKeeper и не ждёт выполнения чего-либо. ### RESTART REPLICA {#query_language-system-restart-replica} diff --git a/docs/ru/sql-reference/table-functions/url.md b/docs/ru/sql-reference/table-functions/url.md index e5d9faeec00..c90968d92af 100644 --- a/docs/ru/sql-reference/table-functions/url.md +++ b/docs/ru/sql-reference/table-functions/url.md @@ -21,6 +21,7 @@ url(URL [,format] [,structure]) - `URL` — HTTP или HTTPS-адрес сервера, который может принимать запросы `GET` или `POST` (для запросов `SELECT` или `INSERT` соответственно). Тип: [String](../../sql-reference/data-types/string.md). - `format` — [формат](../../interfaces/formats.md#formats) данных. Тип: [String](../../sql-reference/data-types/string.md). - `structure` — структура таблицы в формате `'UserID UInt64, Name String'`. Определяет имена и типы столбцов. Тип: [String](../../sql-reference/data-types/string.md). +- `headers` - HTTP-заголовки в формате `'headers('key1'='value1', 'key2'='value2')'`. Определяет заголовки для HTTP вызова. **Возвращаемое значение** @@ -31,7 +32,7 @@ url(URL [,format] [,structure]) Получение с HTTP-сервера первых 3 строк таблицы с данными в формате [CSV](../../interfaces/formats.md#csv), содержащей столбцы типа [String](../../sql-reference/data-types/string.md) и [UInt32](../../sql-reference/data-types/int-uint.md). ``` sql -SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3; +SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32', headers('Accept'='text/csv; charset=utf-8')) LIMIT 3; ``` Вставка данных в таблицу: @@ -46,3 +47,12 @@ SELECT * FROM test_table; Шаблоны в фигурных скобках `{ }` используются, чтобы сгенерировать список шардов или указать альтернативные адреса на случай отказа. Поддерживаемые типы шаблонов и примеры смотрите в описании функции [remote](remote.md#globs-in-addresses). Символ `|` внутри шаблонов используется, чтобы задать адреса, если предыдущие оказались недоступны. Эти адреса перебираются в том же порядке, в котором они указаны в шаблоне. Количество адресов, которые могут быть сгенерированы, ограничено настройкой [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements). + +## Виртуальные столбцы + +- `_path` — Путь до `URL`. +- `_file` — Имя ресурса `URL`. + +**Смотрите также** + +- [Виртуальные столбцы](index.md#table_engines-virtual_columns) diff --git a/docs/zh/development/build-osx.md b/docs/zh/development/build-osx.md index 639df253dce..40b8ebe9e51 100644 --- a/docs/zh/development/build-osx.md +++ b/docs/zh/development/build-osx.md @@ -46,7 +46,7 @@ $ cd .. 为此,请创建以下文件: -/资源库/LaunchDaemons/limit.maxfiles.plist: +/Library/LaunchDaemons/limit.maxfiles.plist: ``` xml diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index a52d77a7a33..56e3e1dfd50 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -34,7 +34,7 @@ git push ## 描述信息检查 {#description-check} 检查pull请求的描述是否符合[PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md)模板. -您必须为您的更改指定一个更改日志类别(例如,Bug修复), 并且为[CHANGELOG.md](../whats-new/changelog/)编写一条用户可读的消息用来描述更改. +您必须为您的更改指定一个更改日志类别(例如,Bug修复), 并且为[CHANGELOG.md](../whats-new/changelog/index.md)编写一条用户可读的消息用来描述更改. ## 推送到DockerHub {#push-to-dockerhub} 生成用于构建和测试的docker映像, 然后将它们推送到DockerHub. diff --git a/docs/zh/engines/database-engines/index.md b/docs/zh/engines/database-engines/index.md index 0b24590686e..2839f819671 100644 --- a/docs/zh/engines/database-engines/index.md +++ b/docs/zh/engines/database-engines/index.md @@ -16,7 +16,7 @@ sidebar_position: 27 - [MaterializeMySQL](../../engines/database-engines/materialized-mysql.md) -- [Lazy](../../engines/database-engines/lazy) +- [Lazy](../../engines/database-engines/lazy.md) - [Atomic](../../engines/database-engines/atomic.md) diff --git a/docs/zh/engines/database-engines/materialize-mysql.md b/docs/zh/engines/database-engines/materialize-mysql.md index 10049017c71..5d1394f9456 100644 --- a/docs/zh/engines/database-engines/materialize-mysql.md +++ b/docs/zh/engines/database-engines/materialize-mysql.md @@ -38,8 +38,8 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor - `max_wait_time_when_mysql_unavailable` — 当MySQL不可用时重试间隔(毫秒)。负值禁止重试。默认值: `1000`. - `allows_query_when_mysql_lost` — 当mysql丢失时,允许查询物化表。默认值: `0` (`false`). ``` -CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***') - SETTINGS +CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', '***') + SETTINGS allows_query_when_mysql_lost=true, max_wait_time_when_mysql_unavailable=10000; ``` @@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', ### DDL查询 {#ddl-queries} -MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询,则该查询将被忽略。 +MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询,则该查询将被忽略。 ### Data Replication {#data-replication} @@ -148,9 +148,9 @@ mysql> SELECT * FROM test; ``` ```text -+---+------+------+ ++---+------+------+ | a | b | c | -+---+------+------+ ++---+------+------+ | 2 | 222 | Wow! | +---+------+------+ ``` @@ -177,9 +177,9 @@ SELECT * FROM mysql.test; ``` ``` text -┌─a─┬──b─┐ -│ 1 │ 11 │ -│ 2 │ 22 │ +┌─a─┬──b─┐ +│ 1 │ 11 │ +│ 2 │ 22 │ └───┴────┘ ``` @@ -190,7 +190,7 @@ SELECT * FROM mysql.test; ``` ``` text -┌─a─┬───b─┬─c────┐ -│ 2 │ 222 │ Wow! │ +┌─a─┬───b─┬─c────┐ +│ 2 │ 222 │ Wow! │ └───┴─────┴──────┘ ``` diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md index c34d3a6f20d..5c735556c48 100644 --- a/docs/zh/engines/database-engines/materialized-mysql.md +++ b/docs/zh/engines/database-engines/materialized-mysql.md @@ -109,7 +109,7 @@ MySQL中的Time 类型,会被ClickHouse转换成微秒来存储 ### DDL Queries {#ddl-queries} -MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。 +MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。 ### 数据复制 {#data-replication} @@ -152,17 +152,17 @@ ClickHouse只有一个物理排序,由 `order by` 条件决定。要创建一 这些是你可以对MaterializedMySQL表重写的模式转换操作: * 修改列类型。必须与原始类型兼容,否则复制将失败。例如,可以将`UInt32`列修改为`UInt64`,不能将 `String` 列修改为 `Array(String)`。 - * 修改 [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl). + * 修改 [column TTL](../table-engines/mergetree-family/mergetree.md#mergetree-column-ttl). * 修改 [column compression codec](../../sql-reference/statements/create/table.mdx#codecs). * 增加 [ALIAS columns](../../sql-reference/statements/create/table.mdx#alias). - * 增加 [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes) - * 增加 [projections](../table-engines/mergetree-family/mergetree/#projections). + * 增加 [skipping indexes](../table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes) + * 增加 [projections](../table-engines/mergetree-family/mergetree.md#projections). 请注意,当使用 `SELECT ... FINAL ` (MaterializedMySQL默认是这样做的) 时,预测优化是被禁用的,所以这里是受限的, `INDEX ... TYPE hypothesis `[在v21.12的博客文章中描述]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)可能在这种情况下更有用。 - * 修改 [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/) - * 修改 [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) - * 修改 [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) - * 增加 [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) - * 增加 [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * 修改 [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key.md) + * 修改 [ORDER BY](../table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) + * 修改 [PRIMARY KEY](../table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) + * 增加 [SAMPLE BY](../table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) + * 增加 [table TTL](../table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) ```sql CREATE DATABASE db_name ENGINE = MaterializedMySQL(...) diff --git a/docs/zh/engines/database-engines/replicated.md b/docs/zh/engines/database-engines/replicated.md index df5872e9b19..843d7a3edd5 100644 --- a/docs/zh/engines/database-engines/replicated.md +++ b/docs/zh/engines/database-engines/replicated.md @@ -19,7 +19,6 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na - `shard_name` — 分片的名字。数据库副本按`shard_name`分组到分片中。 - `replica_name` — 副本的名字。同一分片的所有副本的副本名称必须不同。 -!!! note "警告" 对于[ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication)表,如果没有提供参数,则使用默认参数:`/clickhouse/tables/{uuid}/{shard}`和`{replica}`。这些可以在服务器设置[default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path)和[default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name)中更改。宏`{uuid}`被展开到表的uuid, `{shard}`和`{replica}`被展开到服务器配置的值,而不是数据库引擎参数。但是在将来,可以使用Replicated数据库的`shard_name`和`replica_name`。 ## 使用方式 {#specifics-and-recommendations} @@ -52,8 +51,8 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n; ``` ``` text -┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐ -│ shard1|replica1 │ 0 │ │ 2 │ 0 │ +┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐ +│ shard1|replica1 │ 0 │ │ 2 │ 0 │ │ shard1|other_replica │ 0 │ │ 1 │ 0 │ │ other_shard|r1 │ 0 │ │ 0 │ 0 │ └──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘ @@ -62,13 +61,13 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n; 显示系统表: ``` sql -SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local +SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local FROM system.clusters WHERE cluster='r'; ``` ``` text -┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐ -│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ +┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐ +│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ │ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │ │ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │ └─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘ @@ -83,9 +82,9 @@ node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY ``` ``` text -┌─hosts─┬─groupArray(n)─┐ -│ node1 │ [1,3,5,7,9] │ -│ node2 │ [0,2,4,6,8] │ +┌─hosts─┬─groupArray(n)─┐ +│ node1 │ [1,3,5,7,9] │ +│ node2 │ [0,2,4,6,8] │ └───────┴───────────────┘ ``` @@ -98,8 +97,8 @@ node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2'); 集群配置如下所示: ``` text -┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐ -│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ +┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐ +│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ │ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │ │ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │ │ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │ @@ -113,8 +112,8 @@ node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY ``` ```text -┌─hosts─┬─groupArray(n)─┐ -│ node2 │ [1,3,5,7,9] │ -│ node4 │ [0,2,4,6,8] │ +┌─hosts─┬─groupArray(n)─┐ +│ node2 │ [1,3,5,7,9] │ +│ node4 │ [0,2,4,6,8] │ └───────┴───────────────┘ ``` diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md index 5ab1e0573af..fd4e5e9c10a 100644 --- a/docs/zh/engines/table-engines/integrations/kafka.md +++ b/docs/zh/engines/table-engines/integrations/kafka.md @@ -163,4 +163,4 @@ clickhouse也支持自己使用keyfile的方式来维护kerbros的凭证。配 **另请参阅** - [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) -- [后台消息代理调度池大小](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size) +- [后台消息代理调度池大小](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 40aa764e2d3..6775662d555 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -1,6 +1,6 @@ ---- -slug: /zh/engines/table-engines/mergetree-family/mergetree ---- +--- +slug: /zh/engines/table-engines/mergetree-family/mergetree +--- # MergeTree {#table_engines-mergetree} Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。 @@ -25,8 +25,9 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 需要的话,您可以给表设置一个采样方法。 -!!! note "注意" - [合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 +:::info +[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 +::: ## 建表 {#table_engine-mergetree-creating-a-table} @@ -191,7 +192,7 @@ ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记 ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键的行。 -您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](../../../operations/settings/#allow-nullable-key),[NULLS_LAST](../../../sql-reference/statements/select/order-by.md/#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 +您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](../../../operations/settings/index.md#allow-nullable-key),[NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 ### 主键的选择 {#zhu-jian-de-xuan-ze} @@ -329,7 +330,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 支持的数据类型:`Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`。 - 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions) + 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md) ``` sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 @@ -352,19 +353,19 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达 | [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | | [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](../../../sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../../sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | | [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | | [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | | [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | | [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../../sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../../sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | 常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。 -!!! note "注意" +:::note 布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如: - 可以用来优化的场景 @@ -379,6 +380,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达 - `NOT s = 1` - `s != 1` - `NOT startsWith(s, 'test')` +::: ## 并发数据访问 {#concurrent-data-access} @@ -687,7 +689,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' `default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。您可以使用[ALTER TABLE ... MODIFY SETTING]来修改存储策略,新的存储策略应该包含所有以前的磁盘和卷,并使用相同的名称。 -可以通过 [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 +可以通过 [background_move_pool_size](../../../operations/server-configuration-parameters/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 ### 详细说明 {#details} diff --git a/docs/zh/engines/table-engines/mergetree-family/replication.md b/docs/zh/engines/table-engines/mergetree-family/replication.md index 791ea448212..04d5e7d467c 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replication.md +++ b/docs/zh/engines/table-engines/mergetree-family/replication.md @@ -98,7 +98,7 @@ CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_con 对于非常大的集群,你可以把不同的 ZooKeeper 集群用于不同的分片。然而,即使 Yandex.Metrica 集群(大约300台服务器)也证明还不需要这么做。 -复制是多主异步。 `INSERT` 语句(以及 `ALTER` )可以发给任意可用的服务器。数据会先插入到执行该语句的服务器上,然后被复制到其他服务器。由于它是异步的,在其他副本上最近插入的数据会有一些延迟。如果部分副本不可用,则数据在其可用时再写入。副本可用的情况下,则延迟时长是通过网络传输压缩数据块所需的时间。为复制表执行后台任务的线程数量,可以通过 [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) 进行设置。 +复制是多主异步。 `INSERT` 语句(以及 `ALTER` )可以发给任意可用的服务器。数据会先插入到执行该语句的服务器上,然后被复制到其他服务器。由于它是异步的,在其他副本上最近插入的数据会有一些延迟。如果部分副本不可用,则数据在其可用时再写入。副本可用的情况下,则延迟时长是通过网络传输压缩数据块所需的时间。为复制表执行后台任务的线程数量,可以通过 [background_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_schedule_pool_size) 进行设置。 `ReplicatedMergeTree` 引擎采用一个独立的线程池进行复制拉取。线程池的大小通过 [background_fetches_pool_size](../../../operations/settings/settings.md#background_fetches_pool_size) 进行限定,它可以在重启服务器时进行调整。 @@ -282,8 +282,8 @@ sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data **参考** -- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) -- [background_fetches_pool_size](../../../operations/settings/settings.md#background_fetches_pool_size) +- [background_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_schedule_pool_size) +- [background_fetches_pool_size](../../../operations/server-configuration-parameters/settings.md#background_fetches_pool_size) - [execute_merges_on_single_replica_time_threshold](../../../operations/settings/settings.md#execute-merges-on-single-replica-time-threshold) - [max_replicated_fetches_network_bandwidth](../../../operations/settings/merge-tree-settings.mdx#max_replicated_fetches_network_bandwidth) - [max_replicated_sends_network_bandwidth](../../../operations/settings/merge-tree-settings.mdx#max_replicated_sends_network_bandwidth) diff --git a/docs/zh/engines/table-engines/special/distributed.md b/docs/zh/engines/table-engines/special/distributed.md index c832e9e19ba..3c8a7a833d0 100644 --- a/docs/zh/engines/table-engines/special/distributed.md +++ b/docs/zh/engines/table-engines/special/distributed.md @@ -45,7 +45,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置 - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 查看示例 - + **分布式设置** - `fsync_after_insert` - 对异步插入到分布式的文件数据执行`fsync`。确保操作系统将所有插入的数据刷新到启动节点**磁盘上的一个文件**中。 @@ -66,19 +66,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 - `monitor_max_sleep_time_ms` - 等同于 [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) -!!! note "备注" +::note +**稳定性设置** (`fsync_...`): - **稳定性设置** (`fsync_...`): +- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上,然后再异步发送到shard。 +— 可能会显著降低`insert`的性能 +- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中,请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`) - - 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上,然后再异步发送到shard。 - — 可能会显著降低`insert`的性能 - - 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中,请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`) +**插入限制设置** (`..._insert`) 请见: - **插入限制设置** (`..._insert`) 请见: +- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置 +- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置 +- `bytes_to_throw_insert` 在 `bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert` +::: - - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置 - - [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置 - - `bytes_to_throw_insert` 在 `bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert` **示例** ``` sql @@ -214,7 +215,7 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何 ## 读取数据 {#distributed-reading-data} - + 当查询一个`Distributed`表时,`SELECT`查询被发送到所有的分片,不管数据是如何分布在分片上的(它们可以完全随机分布)。当您添加一个新分片时,您不必将旧数据传输到它。相反,您可以使用更重的权重向其写入新数据——数据的分布会稍微不均匀,但查询将正确有效地工作。 当启用`max_parallel_replicas`选项时,查询处理将在单个分片中的所有副本之间并行化。更多信息,请参见[max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas)。 @@ -225,8 +226,9 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何 - `_shard_num` — 表`system.clusters` 中的 `shard_num` 值 . 数据类型: [UInt32](../../../sql-reference/data-types/int-uint.md). -!!! note "备注" - 因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效. +:::note +因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效. +::: **详见** - [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) 描述 diff --git a/docs/zh/faq/general.md b/docs/zh/faq/general.md index 5a95b9aad07..530be7f08d8 100644 --- a/docs/zh/faq/general.md +++ b/docs/zh/faq/general.md @@ -1,5 +1,5 @@ --- -slug: /zh/faq/general +slug: /zh/faq/general/overview --- # 常见问题 {#chang-jian-wen-ti} diff --git a/docs/zh/faq/general/columnar-database.md b/docs/zh/faq/general/columnar-database.md index 57541aec69b..004da067900 100644 --- a/docs/zh/faq/general/columnar-database.md +++ b/docs/zh/faq/general/columnar-database.md @@ -7,20 +7,20 @@ sidebar_position: 101 # 什么是列存储数据库? {#what-is-a-columnar-database} -列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是,影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse就是这样一个典型的例子。 +列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是,影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse 就是这样一个典型的例子。 列存储数据库的主要优点是: - 查询只使用许多列其中的少数列。 -— 聚合对大量数据的查询。 -— 按列压缩。 +- 聚合对大量数据的查询。 +- 按列压缩。 下面是构建报表时传统的面向行系统和柱状数据库之间的区别: **传统行存储** -!(传统行存储)(https://clickhouse.com/docs/en/images/row-oriented.gif) +![传统行存储](https://clickhouse.com/docs/assets/images/row-oriented-3e6fd5aa48e3075202d242b4799da8fa.gif) **列存储** -!(列存储)(https://clickhouse.com/docs/en/images/column-oriented.gif) +![列存储](https://clickhouse.com/docs/assets/images/column-oriented-d082e49b7743d4ded32c7952bfdb028f.gif) -列存储数据库是分析应用程序的首选,因为它允许在一个表中有许多列以防万一,但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的,因为和数据仓库一样,它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。 \ No newline at end of file +列存储数据库是分析应用程序的首选,因为它允许在一个表中有许多列以防万一,但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的,因为和数据仓库一样,它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse 结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。 diff --git a/docs/zh/faq/general/index.md b/docs/zh/faq/general/index.md index 8b0b42cede2..9693e7ffc82 100644 --- a/docs/zh/faq/general/index.md +++ b/docs/zh/faq/general/index.md @@ -21,8 +21,7 @@ sidebar_label: General - [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md) - !!! info "没找到您需要的内容?" - 请查阅 [其他 F.A.Q. 类别](../../faq/) 或者从左侧导航栏浏览其他文档 - + 请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档 + {## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##} diff --git a/docs/zh/faq/general/why-clickhouse-is-so-fast.md b/docs/zh/faq/general/why-clickhouse-is-so-fast.md index a30b56adb9a..1962b8b90c2 100644 --- a/docs/zh/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/zh/faq/general/why-clickhouse-is-so-fast.md @@ -9,7 +9,7 @@ sidebar_position: 8 It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system. -ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by/) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible: +ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible: Column-oriented storage : Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted. diff --git a/docs/zh/faq/integration/index.md b/docs/zh/faq/integration/index.md index 6678956a0b3..3a3f97761f3 100644 --- a/docs/zh/faq/integration/index.md +++ b/docs/zh/faq/integration/index.md @@ -1,5 +1,5 @@ --- -slug: /zh/faq/integration/ +slug: /zh/faq/integration/ title: 关于集成ClickHouse和其他系统的问题 toc_hidden_folder: true sidebar_position: 4 @@ -17,6 +17,6 @@ sidebar_label: Integration !!! info "没看到你要找的东西吗?" - 查看[其他faq类别](../../faq/)或浏览左边栏中的主要文档文章。 + 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 {## [原文](https://clickhouse.com/docs/en/faq/integration/) ##} \ No newline at end of file diff --git a/docs/zh/faq/integration/json-import.md b/docs/zh/faq/integration/json-import.md index 861abacc1e1..2d5c687316d 100644 --- a/docs/zh/faq/integration/json-import.md +++ b/docs/zh/faq/integration/json-import.md @@ -7,29 +7,29 @@ sidebar_position: 11 # How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse} -ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats/). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats/#jsoneachrow). It expects one JSON object per row, each object separated by a newline. +ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline. ## Examples {#examples} -Using [HTTP interface](../../interfaces/http/): +Using [HTTP interface](../../interfaces/http.md): ``` bash $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @- ``` -Using [CLI interface](../../interfaces/cli/): +Using [CLI interface](../../interfaces/cli.md): ``` bash $ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow" ``` -Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/) instead. +Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead. ## Useful Settings {#useful-settings} - `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them). -- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested/) type. +- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. -:::note +:::note Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. ::: \ No newline at end of file diff --git a/docs/zh/faq/operations/delete-old-data.md b/docs/zh/faq/operations/delete-old-data.md index b2229058cad..24181116bab 100644 --- a/docs/zh/faq/operations/delete-old-data.md +++ b/docs/zh/faq/operations/delete-old-data.md @@ -19,7 +19,7 @@ The key advantage of this approach is that it does not need any external system TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD. ::: -More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree/#table_engine-mergetree-ttl). +More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). ## ALTER DELETE {#alter-delete} @@ -41,4 +41,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. -More details on [table truncation](../../sql-reference/statements/truncate/). +More details on [table truncation](../../sql-reference/statements/truncate.md). \ No newline at end of file diff --git a/docs/zh/faq/operations/index.md b/docs/zh/faq/operations/index.md index 071cc872e4e..153eda6199a 100644 --- a/docs/zh/faq/operations/index.md +++ b/docs/zh/faq/operations/index.md @@ -1,5 +1,5 @@ --- -slug: /zh/faq/operations/ +slug: /zh/faq/operations/ title: 关于操作ClickHouse服务器和集群的问题 toc_hidden_folder: true sidebar_position: 3 @@ -13,9 +13,9 @@ sidebar_label: Operations - [如果想在生产环境部署,需要用哪个版本的 ClickHouse 呢?](../../faq/operations/production.md) - [是否可能从 ClickHouse 数据表中删除所有旧的数据记录?](../../faq/operations/delete-old-data.md) - [ClickHouse支持多区域复制吗?](../../faq/operations/multi-region-replication.md) - + !!! info "没看到你要找的东西吗?" - 查看[其他faq类别](../../faq/)或浏览左边栏中的主要文档文章。 + 查看[其他faq类别](../../faq/index.md)或浏览左边栏中的主要文档文章。 {## [原文](https://clickhouse.com/docs/en/faq/production/) ##} diff --git a/docs/zh/faq/operations/production.md b/docs/zh/faq/operations/production.md index cc5cf6b9614..90db050e8d3 100644 --- a/docs/zh/faq/operations/production.md +++ b/docs/zh/faq/operations/production.md @@ -67,6 +67,6 @@ For production use, there are two key options: `stable` and `lts`. Here is some Many teams who initially thought that `lts` is the way to go, often switch to `stable` anyway because of some recent feature that’s important for their product. -:::warning -One more thing to keep in mind when upgrading ClickHouse: we’re always keeping eye on compatibility across releases, but sometimes it’s not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/) before upgrading to see if there are any notes about backward-incompatible changes. +:::warning +One more thing to keep in mind when upgrading ClickHouse: we’re always keeping eye on compatibility across releases, but sometimes it’s not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/index.md) before upgrading to see if there are any notes about backward-incompatible changes. ::: \ No newline at end of file diff --git a/docs/zh/faq/use-cases/index.md b/docs/zh/faq/use-cases/index.md index 75ef26368a3..ff0d873b37f 100644 --- a/docs/zh/faq/use-cases/index.md +++ b/docs/zh/faq/use-cases/index.md @@ -14,6 +14,6 @@ sidebar_label: 使用案例 - [我能把 ClickHouse 当做Key-value 键值存储来使用吗?](../../faq/use-cases/key-value.md) !!! info "没找到您所需要的内容?" - 请查看[其他常见问题类别](../../faq/)或浏览左侧边栏中的主要文档文章。 + 请查看[其他常见问题类别](../../faq/index.md)或浏览左侧边栏中的主要文档文章。 {## [原始文档](https://clickhouse.com/docs/en/faq/use-cases/) ##} diff --git a/docs/zh/getting-started/example-datasets/recipes.mdx b/docs/zh/getting-started/example-datasets/recipes.mdx index b7ed92962c5..b7f8fe8eafd 100644 --- a/docs/zh/getting-started/example-datasets/recipes.mdx +++ b/docs/zh/getting-started/example-datasets/recipes.mdx @@ -1,5 +1,5 @@ ---- -slug: /zh/getting-started/example-datasets/recipes +--- +slug: /zh/getting-started/example-datasets/recipes sidebar_label: 食谱数据集 title: "食谱数据集" --- @@ -8,8 +8,8 @@ RecipeNLG 数据集可在 [此处](https://recipenlg.cs.put.poznan.pl/dataset) ## 下载并解压数据集 -1. 进入下载页面[https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset)。 -2. 接受条款和条件并下载 zip 文件。 +1. 进入下载页面[https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset)。 +2. 接受条款和条件并下载 zip 文件。 3. 使用 `unzip` 解压 zip 文件,得到 `full_dataset.csv` 文件。 ## 创建表 @@ -49,13 +49,13 @@ clickhouse-client --query " 这是一个展示如何解析自定义 CSV,这其中涉及了许多调整。 -说明: -- 数据集为 CSV 格式,但在插入时需要一些预处理;使用表函数 [input](../../sql-reference/table-functions/input.md) 进行预处理; -- CSV 文件的结构在表函数 `input` 的参数中指定; -- 字段 `num`(行号)是不需要的 - 可以忽略并从文件中进行解析; -- 使用 `FORMAT CSVWithNames`,因为标题不包含第一个字段的名称,因此 CSV 中的标题将被忽略(通过命令行参数 `--input_format_with_names_use_header 0`); -- 文件仅使用双引号将 CSV 字符串括起来;一些字符串没有用双引号括起来,单引号也不能被解析为括起来的字符串 - 所以添加`--format_csv_allow_single_quote 0`参数接受文件中的单引号; -- 由于某些 CSV 的字符串的开头包含 `\M/` 因此无法被解析; CSV 中唯一可能以反斜杠开头的值是 `\N`,这个值被解析为 SQL NULL。通过添加`--input_format_allow_errors_num 10`参数,允许在导入过程中跳过 10 个格式错误; +说明: +- 数据集为 CSV 格式,但在插入时需要一些预处理;使用表函数 [input](../../sql-reference/table-functions/input.md) 进行预处理; +- CSV 文件的结构在表函数 `input` 的参数中指定; +- 字段 `num`(行号)是不需要的 - 可以忽略并从文件中进行解析; +- 使用 `FORMAT CSVWithNames`,因为标题不包含第一个字段的名称,因此 CSV 中的标题将被忽略(通过命令行参数 `--input_format_with_names_use_header 0`); +- 文件仅使用双引号将 CSV 字符串括起来;一些字符串没有用双引号括起来,单引号也不能被解析为括起来的字符串 - 所以添加`--format_csv_allow_single_quote 0`参数接受文件中的单引号; +- 由于某些 CSV 的字符串的开头包含 `\M/` 因此无法被解析; CSV 中唯一可能以反斜杠开头的值是 `\N`,这个值被解析为 SQL NULL。通过添加`--input_format_allow_errors_num 10`参数,允许在导入过程中跳过 10 个格式错误; - 在数据集中的 Ingredients、directions 和 NER 字段为数组;但这些数组并没有以一般形式表示:这些字段作为 JSON 序列化为字符串,然后放入 CSV 中 - 在导入是将它们解析为字符串,然后使用 [JSONExtract](../../sql-reference/functions/json-functions.md ) 函数将其转换为数组。 ## 验证插入的数据 @@ -80,7 +80,7 @@ SELECT count() FROM recipes; ### 按配方数量排列的顶级组件: -在此示例中,我们学习如何使用 [arrayJoin](../../sql-reference/functions/array-join/) 函数将数组扩展为行的集合。 +在此示例中,我们学习如何使用 [arrayJoin](../../sql-reference/functions/array-join.md) 函数将数组扩展为行的集合。 请求: @@ -185,7 +185,7 @@ LIMIT 10 10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.) ``` -在此示例中,我们使用 [has](../../sql-reference/functions/array-functions/#hasarr-elem) 函数来按过滤数组类型元素并按 directions 的数量进行排序。 +在此示例中,我们使用 [has](../../sql-reference/functions/array-functions.md#hasarr-elem) 函数来按过滤数组类型元素并按 directions 的数量进行排序。 有一个婚礼蛋糕需要整个126个步骤来制作!显示 directions: diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index f622c51c0c4..e65cfea62cd 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -84,6 +84,17 @@ sudo /etc/init.d/clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you set up a password. ``` +For systems with `zypper` package manager (openSUSE, SLES): + +``` bash +sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g +sudo zypper --gpg-auto-import-keys refresh clickhouse-stable +sudo zypper install -y clickhouse-server clickhouse-client + +sudo /etc/init.d/clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. +``` +
Deprecated Method for installing rpm-packages diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index bef3ecee35f..989cf5f57d8 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -617,8 +617,9 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; 启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。 -!!! note "注意" - ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。 +:::note +ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。 +::: ZooKeeper位置在配置文件中指定: diff --git a/docs/zh/guides/improving-query-performance/skipping-indexes.md b/docs/zh/guides/improving-query-performance/skipping-indexes.md index 2f9ce09d77f..f9f43e46927 100644 --- a/docs/zh/guides/improving-query-performance/skipping-indexes.md +++ b/docs/zh/guides/improving-query-performance/skipping-indexes.md @@ -1,6 +1,6 @@ --- slug: /zh/guides/improving-query-performance/skipping-indexes -sidebar_label: Data Skipping Indexes +sidebar_label: 跳数索引 sidebar_position: 2 --- @@ -89,7 +89,7 @@ SELECT * FROM skip_table WHERE my_value IN (125, 700) 下图是更直观的展示,这就是如何读取和选择my_value为125的4096行,以及如何跳过以下行而不从磁盘读取: -![Simple Skip](../../../en/guides/improving-query-performance/images/simple_skip.svg) +![Simple Skip](../../../en/guides/best-practices/images/simple_skip.svg) 通过在执行查询时启用跟踪,用户可以看到关于跳数索引使用情况的详细信息。在clickhouse-client中设置send_logs_level: @@ -126,7 +126,7 @@ Bloom filter是一种数据结构,它允许对集合成员进行高效的是 * 基本的**bloom_filter**接受一个可选参数,该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用.025)。 * 更专业的**tokenbf_v1**。需要三个参数,用来优化布隆过滤器:(1)过滤器的大小字节(大过滤器有更少的假阳性,有更高的存储成本),(2)哈希函数的个数(更多的散列函数可以减少假阳性)。(3)布隆过滤器哈希函数的种子。有关这些参数如何影响布隆过滤器功能的更多细节,请参阅 [这里](https://hur.st/bloomfilter/) 。此索引仅适用于String、FixedString和Map类型的数据。输入表达式被分割为由非字母数字字符分隔的字符序列。例如,列值`This is a candidate for a "full text" search`将被分割为`This` `is` `a` `candidate` `for` `full` `text` `search`。它用于LIKE、EQUALS、in、hasToken()和类似的长字符串中单词和其他值的搜索。例如,一种可能的用途是在非结构的应用程序日志行列中搜索少量的类名或行号。 - + * 更专业的**ngrambf_v1**。该索引的功能与tokenbf_v1相同。在Bloom filter设置之前需要一个额外的参数,即要索引的ngram的大小。一个ngram是长度为n的任何字符串,比如如果n是4,`A short string`会被分割为`A sh`` sho`, `shor`, `hort`, `ort s`, `or st`, `r str`, ` stri`, `trin`, `ring`。这个索引对于文本搜索也很有用,特别是没有单词间断的语言,比如中文。 ### 跳数索引函数 @@ -150,7 +150,7 @@ Bloom filter是一种数据结构,它允许对集合成员进行高效的是 考虑以下数据分布: -![Bad Skip!](../../../en/guides/improving-query-performance/images/bad_skip_1.svg) +![Bad Skip!](../../../en/guides/best-practices/images/bad_skip_1.svg) 假设主键/顺序是时间戳,并且在visitor_id上有一个索引。考虑下面的查询: diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index e773a02fbc3..eedc913cf82 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -1,6 +1,6 @@ --- -slug: /zh/guides/improving-query-performance/sparse-primary-indexes -sidebar_label: Sparse Primary Indexes +slug: /zh/guides/best-practices +sidebar_label: 主键稀疏索引 sidebar_position: 20 --- @@ -19,21 +19,21 @@ sidebar_position: 20 :::note 这篇文章主要关注稀疏索引。 -如果想了解二级跳数索引,请查看[教程](./skipping-indexes.md). +如果想了解二级跳数索引,请查看[教程](./skipping-indexes.md). ::: -## 数据集 +## 数据集 在本文中,我们将使用一个匿名的web流量数据集。 -- 我们将使用样本数据集中的887万行(事件)的子集。 +- 我们将使用样本数据集中的887万行(事件)的子集。 - 未压缩的数据大小为887万个事件和大约700mb。当存储在ClickHouse时,压缩为200mb。 - 在我们的子集中,每行包含三列,表示在特定时间(EventTime列)单击URL (URL列)的互联网用户(UserID列)。 通过这三个列,我们已经可以制定一些典型的web分析查询,如: - + - 某个用户点击次数最多的前10个url是什么? - 点击某个URL次数最多的前10名用户是谁? - 用户点击特定URL的最频繁时间(比如一周中的几天)是什么? @@ -44,7 +44,7 @@ sidebar_position: 20 ## 全表扫描 -为了了解在没有主键的情况下如何对数据集执行查询,我们通过执行以下SQL DDL语句(使用MergeTree表引擎)创建了一个表: +为了了解在没有主键的情况下如何对数据集执行查询,我们通过执行以下SQL DDL语句(使用MergeTree表引擎)创建了一个表: ```sql CREATE TABLE hits_NoPrimaryKey @@ -70,11 +70,11 @@ FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') WHERE URL != ''; ``` 结果: -```response +```response Ok. 0 rows in set. Elapsed: 145.993 sec. Processed 8.87 million rows, 18.40 GB (60.78 thousand rows/s., 126.06 MB/s.) -``` +``` ClickHouse客户端输出了执行结果,插入了887万行数据。 @@ -102,7 +102,7 @@ ORDER BY Count DESC LIMIT 10; ``` 结果: -```response +```response ┌─URL────────────────────────────┬─Count─┐ │ http://auto.ru/chatay-barana.. │ 170 │ │ http://auto.ru/chatay-id=371...│ 52 │ @@ -117,10 +117,10 @@ LIMIT 10; └────────────────────────────────┴───────┘ 10 rows in set. Elapsed: 0.022 sec. -// highlight-next-line -Processed 8.87 million rows, +// highlight-next-line +Processed 8.87 million rows, 70.45 MB (398.53 million rows/s., 3.17 GB/s.) -``` +``` ClickHouse客户端输出表明,ClickHouse执行了一个完整的表扫描!我们的表的887万行中的每一行都被加载到ClickHouse中,这不是可扩展的。 @@ -131,7 +131,7 @@ ClickHouse客户端输出表明,ClickHouse执行了一个完整的表扫描! ## 包含主键的表 -创建一个包含联合主键UserID和URL列的表: +创建一个包含联合主键UserID和URL列的表: ```sql CREATE TABLE hits_UserID_URL @@ -141,7 +141,7 @@ CREATE TABLE hits_UserID_URL `EventTime` DateTime ) ENGINE = MergeTree -// highlight-next-line +// highlight-next-line PRIMARY KEY (UserID, URL) ORDER BY (UserID, URL, EventTime) SETTINGS index_granularity = 8192, index_granularity_bytes = 0; @@ -149,10 +149,10 @@ SETTINGS index_granularity = 8192, index_granularity_bytes = 0; [//]: # (
)
- + DDL详情 -

+

为了简化本文后面的讨论,并使图和结果可重现,使用DDL语句有如下说明:

    @@ -164,7 +164,7 @@ SETTINGS index_granularity = 8192, index_granularity_bytes = 0;
  • index_granularity: 显式设置为其默认值8192。这意味着对于每一组8192行,主索引将有一个索引条目,例如,如果表包含16384行,那么索引将有两个索引条目。

  • -
  • index_granularity_bytes: 设置为0表示禁止自适应索引粒度。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目 +
  • index_granularity_bytes: 设置为0表示禁止自适应索引粒度。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目
    • 如果n小于8192,但n行的合并行数据大小大于或等于10MB (index_granularity_bytes的默认值)或
    • n达到8192
    • @@ -190,7 +190,7 @@ FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') WHERE URL != ''; ``` 结果: -```response +```response 0 rows in set. Elapsed: 149.432 sec. Processed 8.87 million rows, 18.40 GB (59.38 thousand rows/s., 123.16 MB/s.) ``` @@ -219,7 +219,7 @@ FROM system.parts WHERE (table = 'hits_UserID_URL') AND (active = 1) FORMAT Vertical; ``` - + 结果: ```response @@ -237,7 +237,7 @@ bytes_on_disk: 207.07 MiB ``` 客户端输出表明: - + - 表数据以wide format存储在一个特定目录,每个列有一个数据文件和mark文件。 - 表有887万行数据。 - 未压缩的数据有733.28 MB。 @@ -278,8 +278,8 @@ bytes_on_disk: 207.07 MiB ## 数据按照主键排序存储在磁盘上 -上面创建的表有: -- 联合主键 (UserID, URL) +上面创建的表有: +- 联合主键 (UserID, URL) - 联合排序键 (UserID, URL, EventTime)。 :::note @@ -293,7 +293,7 @@ bytes_on_disk: 207.07 MiB ::: -插入的行按照主键列(以及排序键的附加EventTime列)的字典序(从小到大)存储在磁盘上。 +插入的行按照主键列(以及排序键的附加EventTime列)的字典序(从小到大)存储在磁盘上。 :::note ClickHouse允许插入具有相同主键列的多行数据。在这种情况下(参见下图中的第1行和第2行),最终的顺序是由指定的排序键决定的,这里是EventTime列的值。 @@ -307,7 +307,7 @@ ClickHouse允许插入具有相同主键列的多行数据。在这种情况下( - 然后是URL, - 最后是EventTime: - + UserID.bin,URL.bin,和EventTime.bin是UserIDURL,和EventTime列的数据文件。
      @@ -331,13 +331,19 @@ UserID.bin,URL.bin,和EventTime.bin是UserID 下图显示了如何将表中的887万行(列值)组织成1083个颗粒,这是表的DDL语句包含设置index_granularity(设置为默认值8192)的结果。 - + 第一个(根据磁盘上的物理顺序)8192行(它们的列值)在逻辑上属于颗粒0,然后下一个8192行(它们的列值)属于颗粒1,以此类推。 :::note - 最后一个颗粒(1082颗粒)是少于8192行的。 +- 我们在本指南开头的“DDL 语句详细信息”中提到,我们禁用了自适应索引粒度(为了简化本指南中的讨论,并使图表和结果可重现)。 + + 因此,示例表中所有颗粒(除了最后一个)都具有相同大小。 + +- 对于具有自适应索引粒度的表(默认情况下索引粒度是自适应的),某些粒度的大小可以小于 8192 行,具体取决于行数据大小。 + - 我们将主键列(UserID, URL)中的一些列值标记为橙色。 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。 @@ -355,21 +361,21 @@ UserID.bin,URL.bin,和EventTime.bin是UserID 下面的图显示了索引存储了每个颗粒的最小主键列值(在上面的图中用橙色标记的值)。 例如: -- 第一个索引条目(下图中的“mark 0”)存储上图中颗粒0的主键列的最小值, -- 第二个索引条目(下图中的“mark 1”)存储上图中颗粒1的主键列的最小值,以此类推。 +- 第一个索引条目(下图中的“mark 0”)存储上图中颗粒0的主键列的最小值, +- 第二个索引条目(下图中的“mark 1”)存储上图中颗粒1的主键列的最小值,以此类推。 - + -在我们的表中,索引总共有1083个条目,887万行数据和1083个颗粒: +在我们的表中,索引总共有1083个条目,887万行数据和1083个颗粒: - + :::note - 最后一个索引条目(上图中的“mark 1082”)存储了上图中颗粒1082的主键列的最大值。 - 索引条目(索引标记)不是基于表中的特定行,而是基于颗粒。例如,对于上图中的索引条目‘mark 0’,在我们的表中没有UserID为240.923且URL为“goal://metry=10000467796a411…”的行,相反,对于该表,有一个颗粒0,在该颗粒中,最小UserID值是240.923,最小URL值是“goal://metry=10000467796a411…”,这两个值来自不同的行。 -- 主索引文件完全加载到主内存中。如果文件大于可用的空闲内存空间,则ClickHouse将发生错误。 +- 主索引文件完全加载到主内存中。如果文件大于可用的空闲内存空间,则ClickHouse将发生错误。 ::: @@ -377,11 +383,11 @@ UserID.bin,URL.bin,和EventTime.bin是UserID - UserID index marks:
      主索引中存储的UserID值按升序排序。
      上图中的‘mark 1’指示颗粒1中所有表行的UserID值,以及随后所有颗粒中的UserID值,都保证大于或等于4.073.710。 - + [正如我们稍后将看到的](#query-on-userid-fast), 当查询对主键的第一列进行过滤时,此全局有序使ClickHouse能够对第一个键列的索引标记使用二分查找算法。 -- URL index marks:
      - 主键列UserIDURL有相同的基数,这意味着第一列之后的所有主键列的索引标记通常只表示每个颗粒的数据范围。
      +- URL index marks:
      + 主键列UserIDURL有相同的基数,这意味着第一列之后的所有主键列的索引标记通常只表示每个颗粒的数据范围。
      例如,‘mark 0’中的URL列所有的值都大于等于goal://metry=10000467796a411..., 然后颗粒1中的URL并不是如此,这是因为‘mark 1‘与‘mark 0‘具有不同的UserID列值。 稍后我们将更详细地讨论这对查询执行性能的影响。 @@ -401,7 +407,7 @@ GROUP BY URL ORDER BY Count DESC LIMIT 10; ``` - + 结果: @@ -420,24 +426,24 @@ LIMIT 10; └────────────────────────────────┴───────┘ 10 rows in set. Elapsed: 0.005 sec. -// highlight-next-line -Processed 8.19 thousand rows, +// highlight-next-line +Processed 8.19 thousand rows, 740.18 KB (1.53 million rows/s., 138.59 MB/s.) ``` -ClickHouse客户端的输出显示,没有进行全表扫描,只有8.19万行流到ClickHouse。 +ClickHouse客户端的输出显示,没有进行全表扫描,只有8.19千行流到ClickHouse。 如果trace logging打开了,那ClickHouse服务端日志会显示ClickHouse正在对1083个UserID索引标记执行二分查找以便识别可能包含UserID列值为749927693的行的颗粒。这需要19个步骤,平均时间复杂度为O(log2 n): ```response ...Executor): Key condition: (column 0 in [749927693, 749927693]) -// highlight-next-line +// highlight-next-line ...Executor): Running binary search on index range for part all_1_9_2 (1083 marks) ...Executor): Found (LEFT) boundary mark: 176 ...Executor): Found (RIGHT) boundary mark: 177 ...Executor): Found continuous range in 19 steps ...Executor): Selected 1/1 parts by partition key, 1 parts by primary key, -// highlight-next-line +// highlight-next-line 1/1083 marks by primary key, 1 marks to read from 1 ranges ...Reading ...approx. 8192 rows starting from 1441792 ``` @@ -446,12 +452,12 @@ ClickHouse客户端的输出显示,没有进行全表扫描,只有8.19万行 我们可以在上面的跟踪日志中看到,1083个现有标记中有一个满足查询。
      - + Trace Log详情 -

      +

      -Mark 176 was identified (the 'found left boundary mark' is inclusive, the 'found right boundary mark' is exclusive), and therefore all 8192 rows from granule 176 (which starts at row 1.441.792 - we will see that later on in this article) are then streamed into ClickHouse in order to find the actual rows with a UserID column value of 749927693. +Mark 176 was identified (the 'found left boundary mark' is inclusive, the 'found right boundary mark' is exclusive), and therefore all 8192 rows from granule 176 (which starts at row 1.441.792 - we will see that later on in this article) are then streamed into ClickHouse in order to find the actual rows with a UserID column value of 749927693.

      @@ -465,7 +471,7 @@ GROUP BY URL ORDER BY Count DESC LIMIT 10; ``` - + 结果如下: ```response @@ -507,23 +513,23 @@ LIMIT 10; 在**第二阶段(数据读取中)**, ClickHouse定位所选的颗粒,以便将它们的所有行流到ClickHouse引擎中,以便找到实际匹配查询的行。 -我们将在下一节更详细地讨论第二阶段。 +我们将在下一节更详细地讨论第二阶段。 ## 标记文件用来定位颗粒 -下图描述了上表主索引文件的一部分。 +下图描述了上表主索引文件的一部分。 - + 如上所述,通过对索引的1083个UserID标记进行二分搜索,确定了第176个标记。因此,它对应的颗粒176可能包含UserID列值为749.927.693的行。
      - + 颗粒选择的具体过程 -

      +

      上图显示,标记176是第一个UserID值小于749.927.693的索引条目,并且下一个标记(标记177)的颗粒177的最小UserID值大于该值的索引条目。因此,只有标记176对应的颗粒176可能包含UserID列值为749.927.693的行。

      @@ -537,7 +543,7 @@ LIMIT 10; 下图显示了三个标记文件UserID.mrk、URL.mrk、EventTime.mrk,为表的UserID、URL和EventTime列存储颗粒的物理位置。 - + 我们已经讨论了主索引是一个扁平的未压缩数组文件(primary.idx),其中包含从0开始编号的索引标记。 @@ -545,9 +551,9 @@ LIMIT 10; 一旦ClickHouse确定并选择了可能包含查询所需的匹配行的颗粒的索引标记,就可以在标记文件数组中查找,以获得颗粒的物理位置。 -每个特定列的标记文件条目以偏移量的形式存储两个位置: +每个特定列的标记文件条目以偏移量的形式存储两个位置: -- 第一个偏移量(上图中的'block_offset')是在包含所选颗粒的压缩版本的压缩列数据文件中定位块。这个压缩块可能包含几个压缩的颗粒。所定位的压缩文件块在读取时被解压到内存中。 +- 第一个偏移量(上图中的'block_offset')是在包含所选颗粒的压缩版本的压缩列数据文件中定位块。这个压缩块可能包含几个压缩的颗粒。所定位的压缩文件块在读取时被解压到内存中。 - 标记文件的第二个偏移量(上图中的“granule_offset”)提供了颗粒在解压数据块中的位置。 @@ -576,7 +582,7 @@ LIMIT 10; 下面的图表和文本说明了我们的查询示例,ClickHouse如何在UserID.bin数据文件中定位176颗粒。 - + 我们在本文前面讨论过,ClickHouse选择了主索引标记176,因此176颗粒可能包含查询所需的匹配行。 @@ -624,7 +630,7 @@ LIMIT 10; ``` 结果是: -```response +```response ┌─────UserID─┬─Count─┐ │ 2459550954 │ 3741 │ │ 1084649151 │ 2484 │ @@ -639,26 +645,26 @@ LIMIT 10; └────────────┴───────┘ 10 rows in set. Elapsed: 0.086 sec. -// highlight-next-line -Processed 8.81 million rows, +// highlight-next-line +Processed 8.81 million rows, 799.69 MB (102.11 million rows/s., 9.27 GB/s.) -``` +``` 客户端输出表明,尽管URL列是联合主键的一部分,ClickHouse几乎执行了一一次全表扫描!ClickHouse从表的887万行中读取881万行。 如果启用了trace日志,那么ClickHouse服务日志文件显示,ClickHouse在1083个URL索引标记上使用了通用的排除搜索,以便识别那些可能包含URL列值为"http://public_search"的行。 -```response -...Executor): Key condition: (column 1 in ['http://public_search', +```response +...Executor): Key condition: (column 1 in ['http://public_search', 'http://public_search']) -// highlight-next-line -...Executor): Used generic exclusion search over index for part all_1_9_2 +// highlight-next-line +...Executor): Used generic exclusion search over index for part all_1_9_2 with 1537 steps ...Executor): Selected 1/1 parts by partition key, 1 parts by primary key, -// highlight-next-line +// highlight-next-line 1076/1083 marks by primary key, 1076 marks to read from 5 ranges ...Executor): Reading approx. 8814592 rows with 10 streams -``` -我们可以在上面的跟踪日志示例中看到,1083个颗粒中有1076个(通过标记)被选中,因为可能包含具有匹配URL值的行。 +``` +我们可以在上面的跟踪日志示例中看到,1083个颗粒中有1076个(通过标记)被选中,因为可能包含具有匹配URL值的行。 这将导致881万行被读取到ClickHouse引擎中(通过使用10个流并行地读取),以便识别实际包含URL值"http://public_search"的行。 @@ -671,15 +677,15 @@ Processed 8.81 million rows, 为了说明,我们给出通用的排除搜索算法的工作原理:
      - - 通用排除搜索算法 + + 通用排除搜索算法 -

      +

      -下面将演示当通过第一个列之后的任何列选择颗粒时,当前一个键列具有或高或低的基数时,ClickHouse通用排除搜索算法 是如何工作的。 +下面将演示当通过第一个列之后的任何列选择颗粒时,当前一个键列具有或高或低的基数时,ClickHouse通用排除搜索算法 是如何工作的。 作为这两种情况的例子,我们将假设: - 搜索URL值为"W3"的行。 @@ -693,7 +699,7 @@ Processed 8.81 million rows, 假设UserID具有较低的基数。在这种情况下,相同的UserID值很可能分布在多个表行和颗粒上,从而分布在索引标记上。对于具有相同UserID的索引标记,索引标记的URL值按升序排序(因为表行首先按UserID排序,然后按URL排序)。这使得有效的过滤如下所述: - + 在上图中,我们的抽象样本数据的颗粒选择过程有三种不同的场景: @@ -704,13 +710,13 @@ Processed 8.81 million rows, 3. 可以排除URL值大于W3的索引标记2和3,因为主索引的索引标记存储了每个颗粒的最小键列值,因此颗粒2和3不可能包含URL值W3。 - + **前缀主键高基数** 当UserID具有较高的基数时,相同的UserID值不太可能分布在多个表行和颗粒上。这意味着索引标记的URL值不是单调递增的: - + 正如在上面的图表中所看到的,所有URL值小于W3的标记都被选中,以便将其关联的颗粒的行加载到ClickHouse引擎中。 @@ -736,16 +742,16 @@ Processed 8.81 million rows, 在我们的示例数据集中,两个键列(UserID、URL)都具有类似的高基数,并且,如前所述,当URL列的前一个键列具有较高基数时,通用排除搜索算法不是很有效。 :::note 看下跳数索引 -因为UserID和URL具有较高的基数,[根据URL过滤数据](#query-on-url)不是特别有效,对URL列创建[二级跳数索引](./skipping-indexes.md)同样也不会有太多改善。 +因为UserID和URL具有较高的基数,[根据URL过滤数据](#query-on-url)不是特别有效,对URL列创建[二级跳数索引](./skipping-indexes.md)同样也不会有太多改善。 -例如,这两个语句在我们的表的URL列上创建并填充一个minmax跳数索引。 +例如,这两个语句在我们的表的URL列上创建并填充一个minmax跳数索引。 ```sql ALTER TABLE hits_UserID_URL ADD INDEX url_skipping_index URL TYPE minmax GRANULARITY 4; ALTER TABLE hits_UserID_URL MATERIALIZE INDEX url_skipping_index; ``` ClickHouse现在创建了一个额外的索引来存储—每组4个连续的颗粒(注意上面ALTER TABLE语句中的GRANULARITY 4子句)—最小和最大的URL值: - + 第一个索引条目(上图中的mark 0)存储属于表的前4个颗粒的行的最小和最大URL值。 @@ -786,15 +792,15 @@ ClickHouse现在创建了一个额外的索引来存储—每组4个连续的颗 当创建有不同主键的第二个表时,查询必须显式地发送给最适合查询的表版本,并且必须显式地插入新数据到两个表中,以保持表的同步: - + 在物化视图中,额外的表被隐藏,数据自动在两个表之间保持同步: - + projection方式是最透明的选项,因为除了自动保持隐藏的附加表与数据变化同步外,ClickHouse还会自动选择最有效的表版本进行查询: - + 下面我们使用真实的例子详细讨论下这三种方式。 @@ -813,7 +819,7 @@ CREATE TABLE hits_URL_UserID `EventTime` DateTime ) ENGINE = MergeTree -// highlight-next-line +// highlight-next-line PRIMARY KEY (URL, UserID) ORDER BY (URL, UserID, EventTime) SETTINGS index_granularity = 8192, index_granularity_bytes = 0; @@ -822,10 +828,10 @@ SETTINGS index_granularity = 8192, index_granularity_bytes = 0; 写入887万行源表数据: ```sql -INSERT INTO hits_URL_UserID +INSERT INTO hits_URL_UserID SELECT * from hits_UserID_URL; ``` - + 结果: ```response @@ -841,10 +847,10 @@ OPTIMIZE TABLE hits_URL_UserID FINAL; 因为我们切换了主键中列的顺序,插入的行现在以不同的字典顺序存储在磁盘上(与我们的原始表相比),因此该表的1083个颗粒也包含了与以前不同的值: - + 主键索引如下: - + 现在计算最频繁点击URL"http://public_search"的前10名用户,这时候的查询速度是明显加快的: ```sql @@ -856,7 +862,7 @@ GROUP BY UserID ORDER BY Count DESC LIMIT 10; ``` - + 结果: @@ -875,8 +881,8 @@ LIMIT 10; └────────────┴───────┘ 10 rows in set. Elapsed: 0.017 sec. -// highlight-next-line -Processed 319.49 thousand rows, +// highlight-next-line +Processed 319.49 thousand rows, 11.38 MB (18.41 million rows/s., 655.75 MB/s.) ``` @@ -887,15 +893,15 @@ Processed 319.49 thousand rows, 将URL作为主索引的第一列,ClickHouse现在对索引标记运行二分搜索。ClickHouse服务器日志文件中对应的跟踪日志: ```response -...Executor): Key condition: (column 0 in ['http://public_search', +...Executor): Key condition: (column 0 in ['http://public_search', 'http://public_search']) -// highlight-next-line +// highlight-next-line ...Executor): Running binary search on index range for part all_1_9_2 (1083 marks) ...Executor): Found (LEFT) boundary mark: 644 ...Executor): Found (RIGHT) boundary mark: 683 ...Executor): Found continuous range in 19 steps ...Executor): Selected 1/1 parts by partition key, 1 parts by primary key, -// highlight-next-line +// highlight-next-line 39/1083 marks by primary key, 39 marks to read from 1 ranges ...Executor): Reading approx. 319488 rows with 2 streams ``` @@ -907,10 +913,10 @@ ClickHouse只选择了39个索引标记,而不是使用通用排除搜索时 点击下面了解详情:

      - + 对UserID的查询过滤性能较差 -

      +

      ```sql SELECT URL, count(URL) AS Count @@ -920,7 +926,7 @@ GROUP BY URL ORDER BY Count DESC LIMIT 10; ``` - + 结果 ```response @@ -938,8 +944,8 @@ LIMIT 10; └────────────────────────────────┴───────┘ 10 rows in set. Elapsed: 0.024 sec. -// highlight-next-line -Processed 8.02 million rows, +// highlight-next-line +Processed 8.02 million rows, 73.04 MB (340.26 million rows/s., 3.10 GB/s.) ``` @@ -947,10 +953,10 @@ Processed 8.02 million rows, ```response ...Executor): Key condition: (column 1 in [749927693, 749927693]) // highlight-next-line -...Executor): Used generic exclusion search over index for part all_1_9_2 +...Executor): Used generic exclusion search over index for part all_1_9_2 with 1453 steps ...Executor): Selected 1/1 parts by partition key, 1 parts by primary key, -// highlight-next-line +// highlight-next-line 980/1083 marks by primary key, 980 marks to read from 23 ranges ...Executor): Reading approx. 8028160 rows with 10 streams ``` @@ -960,7 +966,7 @@ Processed 8.02 million rows, 现在我们有了两张表。优化了对UserID和URL的查询过滤,分别: - + @@ -981,7 +987,7 @@ ORDER BY (URL, UserID, EventTime) POPULATE AS SELECT * FROM hits_UserID_URL; ``` - + 结果: ```response @@ -993,20 +999,20 @@ Ok. :::note - 我们在视图的主键中切换键列的顺序(与原始表相比) - 物化视图由一个隐藏表支持,该表的行顺序和主索引基于给定的主键定义 -- 我们使用POPULATE关键字,以便用源表hits_UserID_URL中的所有887万行立即导入新的物化视图 +- 我们使用POPULATE关键字,以便用源表hits_UserID_URL中的所有887万行立即导入新的物化视图 - 如果在源表hits_UserID_URL中插入了新行,那么这些行也会自动插入到隐藏表中 - 实际上,隐式创建的隐藏表的行顺序和主索引与我们上面显式创建的辅助表相同: - + ClickHouse将隐藏表的列数据文件(.bin)、标记文件(.mrk2)和主索引(primary.idx)存储在ClickHouse服务器的数据目录的一个特殊文件夹中: - + ::: @@ -1021,7 +1027,7 @@ GROUP BY UserID ORDER BY Count DESC LIMIT 10; ``` - + 结果: ```response @@ -1039,8 +1045,8 @@ LIMIT 10; └────────────┴───────┘ 10 rows in set. Elapsed: 0.026 sec. -// highlight-next-line -Processed 335.87 thousand rows, +// highlight-next-line +Processed 335.87 thousand rows, 13.54 MB (12.91 million rows/s., 520.38 MB/s.) ``` @@ -1049,13 +1055,13 @@ Processed 335.87 thousand rows, ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在对索引标记运行二分搜索: ```response -...Executor): Key condition: (column 0 in ['http://public_search', +...Executor): Key condition: (column 0 in ['http://public_search', 'http://public_search']) // highlight-next-line ...Executor): Running binary search on index range ... ... ...Executor): Selected 4/4 parts by partition key, 4 parts by primary key, -// highlight-next-line +// highlight-next-line 41/1083 marks by primary key, 41 marks to read from 4 ranges ...Executor): Reading approx. 335872 rows with 4 streams ``` @@ -1068,7 +1074,7 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在 Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse: ```sql -SET allow_experimental_projection_optimization = 1; +SET optimize_use_projections = 1; ``` @@ -1095,11 +1101,11 @@ ALTER TABLE hits_UserID_URL - 查询总是(从语法上)针对源表hits_UserID_URL,但是如果隐藏表的行顺序和主索引允许更有效地执行查询,那么将使用该隐藏表 - 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: - + ClickHouse将隐藏表的列数据文件(.bin)、标记文件(.mrk2)和主索引(primary.idx)存储在一个特殊的文件夹中(在下面的截图中用橙色标记),紧挨着源表的数据文件、标记文件和主索引文件: - + ::: 由投影创建的隐藏表(以及它的主索引)现在可以(隐式地)用于显著加快URL列上查询过滤的执行。注意,查询在语法上针对投影的源表。 @@ -1113,7 +1119,7 @@ GROUP BY UserID ORDER BY Count DESC LIMIT 10; ``` - + 结果: ```response @@ -1130,8 +1136,8 @@ LIMIT 10; │ 765730816 │ 536 │ └────────────┴───────┘ -10 rows in set. Elapsed: 0.029 sec. -// highlight-next-line +10 rows in set. Elapsed: 0.029 sec. +// highlight-next-line Processed 319.49 thousand rows, 1 1.38 MB (11.05 million rows/s., 393.58 MB/s.) ``` @@ -1142,16 +1148,16 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 ```response -...Executor): Key condition: (column 0 in ['http://public_search', +...Executor): Key condition: (column 0 in ['http://public_search', 'http://public_search']) -// highlight-next-line +// highlight-next-line ...Executor): Running binary search on index range for part prj_url_userid (1083 marks) ...Executor): ... // highlight-next-line ...Executor): Choose complete Normal projection prj_url_userid ...Executor): projection required columns: URL, UserID ...Executor): Selected 1/1 parts by partition key, 1 parts by primary key, -// highlight-next-line +// highlight-next-line 39/1083 marks by primary key, 39 marks to read from 1 ranges ...Executor): Reading approx. 319488 rows with 2 streams ``` diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 852b327366b..fd3cf743818 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -685,8 +685,9 @@ CREATE TABLE IF NOT EXISTS example_table - 如果`input_format_defaults_for_omitted_fields = 0`, 那么`x`和`a`的默认值等于`0`(作为`UInt32`数据类型的默认值)。 - 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。 -!!! note "注意" +:::warning 当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比,ClickHouse消耗更多的计算资源。 +::: ### Selecting Data {#selecting-data} @@ -708,8 +709,9 @@ CREATE TABLE IF NOT EXISTS example_table 与[JSON](#json)格式不同,没有替换无效的UTF-8序列。值以与`JSON`相同的方式转义。 -!!! note "提示" +:::info 字符串中可以输出任意一组字节。如果您确信表中的数据可以被格式化为JSON而不会丢失任何信息,那么就使用`JSONEachRow`格式。 +::: ### Nested Structures {#jsoneachrow-nested} @@ -1216,9 +1218,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -!!! note "警告" - 设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后,该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。 - +:::warning +设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后,该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。 +::: ## Parquet {#data-format-parquet} diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index 11754ed3e2f..c7a0f355a92 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -96,7 +96,7 @@ ECT 1 , expected One of: SHOW TABLES, SHOW DATABASES, SELECT, INSERT, CREATE, ATTACH, RENAME, DROP, DETACH, USE, SET, OPTIMIZE., e.what() = DB::Exception ``` -默认情况下,返回的数据是`TabSeparated`格式的,更多信息,见[Formats](../interfaces/formats/)部分。 +默认情况下,返回的数据是`TabSeparated`格式的,更多信息,见[Formats](../interfaces/formats.md)部分。 您可以使用查询的FORMAT子句来设置其他格式。 @@ -188,8 +188,9 @@ $ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number $ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/' ``` -!!! note "警告" - 一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。 +:::warning +一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。 +::: 您可以使用`database`URL参数或`X-ClickHouse-Database`头来指定默认数据库。 @@ -447,8 +448,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost: max_final_threads 2 ``` -!!! note "警告" - 在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。 +:::warning +在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。 +::: ### 动态查询 {#dynamic_query_handler} diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md index 1d46c19ce8b..4ee74f1d2c3 100644 --- a/docs/zh/interfaces/third-party/client-libraries.md +++ b/docs/zh/interfaces/third-party/client-libraries.md @@ -24,6 +24,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试 - [SeasClick C++ client](https://github.com/SeasX/SeasClick) - [one-ck](https://github.com/lizhichao/one-ck) - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) + - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) - Go - [clickhouse](https://github.com/kshvakov/clickhouse/) - [go-clickhouse](https://github.com/roistat/go-clickhouse) diff --git a/docs/zh/interfaces/third-party/index.md b/docs/zh/interfaces/third-party/index.md index 0c6b194d04a..64c0b30a9c0 100644 --- a/docs/zh/interfaces/third-party/index.md +++ b/docs/zh/interfaces/third-party/index.md @@ -13,5 +13,6 @@ sidebar_position: 24 - [GUI](../../interfaces/third-party/gui.md) - [Proxies](../../interfaces/third-party/proxy.md) -!!! note "注意" +:::note 支持通用API的通用工具[ODBC](../../interfaces/odbc.md)或[JDBC](../../interfaces/jdbc.md),通常也适用于ClickHouse,但这里没有列出,因为它们实在太多了。 +::: diff --git a/docs/zh/operations/access-rights.md b/docs/zh/operations/access-rights.md index 713cf991f47..8d640ef32c0 100644 --- a/docs/zh/operations/access-rights.md +++ b/docs/zh/operations/access-rights.md @@ -24,9 +24,9 @@ ClickHouse权限实体包括: 我们建议你使用SQL工作流的方式。当然配置的方式也可以同时起作用, 所以如果你正在用服务端配置的方式来管理权限和账户,你可以平滑的切换到SQL驱动的工作流方式。 -!!! note "警告" - 你无法同时使用两个配置的方式来管理同一个权限实体。 - +:::warning +你无法同时使用两个配置的方式来管理同一个权限实体。 +::: ## 用法 {#access-control-usage} diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index e0c5174a9a4..6d491f9c2f7 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -12,8 +12,9 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD" 不同公司有不同的可用资源和业务需求,因此不存在一个通用的解决方案可以应对各种情况下的ClickHouse备份和恢复。 适用于 1GB 数据的方案可能并不适用于几十 PB 数据的情况。 有多种具备各自优缺点的可能方法,将在下面对其进行讨论。最好使用几种方法而不是仅仅使用一种方法来弥补它们的各种缺点。。 -!!! note "注" - 需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上演练。 +:::note +需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上演练。 +::: ## 将源数据复制到其它地方 {#duplicating-source-data-somewhere-else} diff --git a/docs/zh/operations/optimizing-performance/sampling-query-profiler.md b/docs/zh/operations/optimizing-performance/sampling-query-profiler.md index 4206274ec0d..5d31ab9b245 100644 --- a/docs/zh/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/zh/operations/optimizing-performance/sampling-query-profiler.md @@ -32,7 +32,7 @@ ClickHouse运行允许分析查询执行的采样探查器。 使用探查器, - 使用 `addressToLine`, `addressToSymbol` 和 `demangle` [内省功能](../../sql-reference/functions/introspection.md) 获取函数名称及其在ClickHouse代码中的位置。 要获取某些查询的配置文件,您需要从以下内容汇总数据 `trace_log` 桌子 您可以通过单个函数或整个堆栈跟踪聚合数据。 -如果你需要想象 `trace_log` 信息,尝试 [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) 和 [测速镜](https://github.com/laplab/clickhouse-speedscope). +如果你需要想象 `trace_log` 信息,尝试 [flamegraph](../../interfaces/third-party/gui.md#clickhouse-flamegraph) 和 [测速镜](https://github.com/laplab/clickhouse-speedscope). ## 示例 {#example} diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index 89ae411b9e0..52142eda2e8 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -528,8 +528,9 @@ SSL客户端/服务器配置。 包含数据的目录的路径。 -!!! note "注" - 尾部斜杠是强制性的。 +:::note +尾部斜杠是强制性的。 +::: **示例** @@ -714,8 +715,9 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv 用于处理大型查询的临时数据的路径。 -!!! note "注" - 尾部斜杠是强制性的。 +:::note +尾部斜杠是强制性的。 +::: **示例** @@ -728,11 +730,12 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv 从政策 [`storage_configuration`](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 存储临时文件。 如果没有设置 [`tmp_path`](#server-settings-tmp_path) 被使用,否则被忽略。 -!!! note "注" - - `move_factor` 被忽略 +:::note +- `move_factor` 被忽略 - `keep_free_space_bytes` 被忽略 - `max_data_part_size_bytes` 被忽略 -您必须在该政策中只有一个卷 +::: ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} @@ -775,7 +778,7 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv ## zookeeper {#server-settings_zookeeper} -包含允许ClickHouse与 [zookpeer](http://zookeeper.apache.org/) 集群。 +包含允许ClickHouse与 [zookeeper](http://zookeeper.apache.org/) 集群。 ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复制的表,则可以省略此部分参数。 diff --git a/docs/zh/operations/settings/settings-profiles.md b/docs/zh/operations/settings/settings-profiles.md index 1ad394950bf..5051276607f 100644 --- a/docs/zh/operations/settings/settings-profiles.md +++ b/docs/zh/operations/settings/settings-profiles.md @@ -8,8 +8,9 @@ sidebar_label: "\u8BBE\u7F6E\u914D\u7F6E" 设置配置是设置的集合,并按照相同的名称进行分组。 -!!! note "信息" - ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。 +:::info +ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。 +::: 设置配置可以任意命名。你可以为不同的用户指定相同的设置配置。您可以在设置配置中写入的最重要的内容是 `readonly=1`,这将确保只读访问。 diff --git a/docs/zh/operations/settings/settings-users.md b/docs/zh/operations/settings/settings-users.md index de4aa27df69..d7fe5bad3c3 100644 --- a/docs/zh/operations/settings/settings-users.md +++ b/docs/zh/operations/settings/settings-users.md @@ -10,8 +10,9 @@ sidebar_label: "\u7528\u6237\u8BBE\u7F6E" `user.xml` 中的 `users` 配置段包含了用户配置 -!!! note "提示" - ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。 +:::note +ClickHouse还支持 [SQL驱动的工作流](/docs/en/operations/access-rights#access-control) 用于管理用户。 我们建议使用它。 +::: `users` 配置段的结构: @@ -78,7 +79,7 @@ sidebar_label: "\u7528\u6237\u8BBE\u7F6E" ### access_management {#access_management-user-setting} -此设置可为用户启用或禁用 SQL-driven [访问控制和帐户管理](../access-rights.md#access-control) 。 +此设置可为用户启用或禁用 SQL-driven [访问控制和帐户管理](/docs/en/operations/access-rights#access-control) 。 可能的值: diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 4107a499463..457b208602f 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -266,8 +266,9 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( 执行时 `INSERT` 查询时,将省略的输入列值替换为相应列的默认值。 此选项仅适用于 [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) 和 [TabSeparated](../../interfaces/formats.md#tabseparated) 格式。 -!!! note "注" - 启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。 +:::note +启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。 +::: 可能的值: diff --git a/docs/zh/operations/system-tables/data_type_families.md b/docs/zh/operations/system-tables/data_type_families.md index 18e9455476d..f0e3a9ef896 100644 --- a/docs/zh/operations/system-tables/data_type_families.md +++ b/docs/zh/operations/system-tables/data_type_families.md @@ -3,7 +3,7 @@ slug: /zh/operations/system-tables/data_type_families --- # system.data_type_families {#system_tables-data_type_families} -包含有关受支持的[数据类型](../../sql-reference/data-types/)的信息. +包含有关受支持的[数据类型](../../sql-reference/data-types/index.md)的信息. 列字段包括: diff --git a/docs/zh/operations/system-tables/parts.md b/docs/zh/operations/system-tables/parts.md index 0bd728f543f..0ebac3944ff 100644 --- a/docs/zh/operations/system-tables/parts.md +++ b/docs/zh/operations/system-tables/parts.md @@ -99,8 +99,9 @@ slug: /zh/operations/system-tables/parts - `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - !!! note "警告" - 保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min` 和 `move_ttl_info.max` 字段。 +:::warning +保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min` 和 `move_ttl_info.max` 字段。 +::: - `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。 diff --git a/docs/zh/operations/system-tables/query_log.md b/docs/zh/operations/system-tables/query_log.md index 93e5771d4b5..7149282dfcc 100644 --- a/docs/zh/operations/system-tables/query_log.md +++ b/docs/zh/operations/system-tables/query_log.md @@ -8,8 +8,9 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 包含已执行查询的相关信息,例如:开始时间、处理持续时间、错误消息。 -!!! note "注" - 此表不包含以下内容的摄取数据 `INSERT` 查询。 +:::note +此表不包含以下内容的摄取数据 `INSERT` 查询。 +::: 您可以更改query_log的设置,在服务器配置的 [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 部分。 diff --git a/docs/zh/operations/system-tables/replicated_fetches.md b/docs/zh/operations/system-tables/replicated_fetches.md index 7fd517c72ab..c6c37759755 100644 --- a/docs/zh/operations/system-tables/replicated_fetches.md +++ b/docs/zh/operations/system-tables/replicated_fetches.md @@ -68,4 +68,4 @@ thread_id: 54 **另请参阅** -- [管理 ReplicatedMergeTree 表](../../sql-reference/statements/system/#query-language-system-replicated) +- [管理 ReplicatedMergeTree 表](../../sql-reference/statements/system.md#query-language-system-replicated) diff --git a/docs/zh/sql-reference/aggregate-functions/reference/corr.md b/docs/zh/sql-reference/aggregate-functions/reference/corr.md index 01a89e428ab..48b5bf904f5 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/corr.md @@ -12,5 +12,6 @@ sidebar_position: 107 计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +::: diff --git a/docs/zh/sql-reference/aggregate-functions/reference/covarpop.md b/docs/zh/sql-reference/aggregate-functions/reference/covarpop.md index 93bfee15684..e98270dc896 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/covarpop.md @@ -12,5 +12,6 @@ covarPop(x, y) 计算 `Σ((x - x̅)(y - y̅)) / n` 的值。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。 +::: diff --git a/docs/zh/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/zh/sql-reference/aggregate-functions/reference/covarsamp.md index 7c8565211b1..1b8dfc4a60f 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/covarsamp.md @@ -14,5 +14,6 @@ covarSamp(x, y) 返回Float64。 当 `n <= 1`, 返回 +∞。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +::: diff --git a/docs/zh/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/zh/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index 8431b5a1110..f0672d4fe45 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -20,7 +20,7 @@ groupArrayInsertAt(default_x, size)(x, pos); **参数** -- `x` — 要插入的值。生成所[支持的数据类型](../../../sql-reference/data-types/index.md)(数据)的[表达式](../../../sql-reference/syntax#syntax-expressions)。 +- `x` — 要插入的值。生成所[支持的数据类型](../../../sql-reference/data-types/index.md)(数据)的[表达式](../../../sql-reference/syntax.md#syntax-expressions)。 - `pos` — 指定元素 `x` 将被插入的位置。 数组中的索引编号从零开始。 [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). - `default_x` — 在空位置替换的默认值。可选参数。生成 `x` 数据类型 (数据) 的[表达式](../../../sql-reference/syntax.md#syntax-expressions)。 如果 `default_x` 未定义,则 [默认值](../../../sql-reference/statements/create.md#create-default-values) 被使用。 - `size`— 结果数组的长度。可选参数。如果使用该参数,必须指定默认值 `default_x` 。 [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges)。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/zh/sql-reference/aggregate-functions/reference/quantiletiming.md index 5e14ce6a11c..af7ad77c717 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -37,8 +37,9 @@ quantileTiming(level)(expr) 否则,计算结果将四舍五入到16毫秒的最接近倍数。 -!!! note "注" - 对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。 +:::note +对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。 +::: **返回值** @@ -46,8 +47,9 @@ quantileTiming(level)(expr) 类型: `Float32`。 -!!! note "注" - 如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 +:::note +如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 +::: **示例** diff --git a/docs/zh/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/zh/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 2c28583343a..b520a0f96af 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -39,8 +39,9 @@ quantileTimingWeighted(level)(expr, weight) 否则,计算结果将四舍五入到16毫秒的最接近倍数。 -!!! note "注" - 对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。 +:::note +对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。 +::: **返回值** @@ -48,8 +49,9 @@ quantileTimingWeighted(level)(expr, weight) 类型: `Float32`。 -!!! note "注" - 如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 +:::note +如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 +::: **示例** diff --git a/docs/zh/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/zh/sql-reference/aggregate-functions/reference/stddevpop.md index ea82e21e46f..a113084cdee 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/stddevpop.md @@ -7,5 +7,6 @@ sidebar_position: 30 结果等于 [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +::: diff --git a/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md index efeafb71072..d242f4e3401 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -7,5 +7,6 @@ sidebar_position: 31 结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +::: \ No newline at end of file diff --git a/docs/zh/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/zh/sql-reference/aggregate-functions/reference/uniqcombined.md index edc790ec00f..7b4a78d662e 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -36,8 +36,9 @@ uniqCombined(HLL_precision)(x[, ...]) - 确定性地提供结果(它不依赖于查询处理顺序)。 -!!! note "注" - 由于它对非 `String` 类型使用32位哈希,对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +:::note +由于它对非 `String` 类型使用32位哈希,对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +::: 相比于 [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) 函数, 该 `uniqCombined`: diff --git a/docs/zh/sql-reference/aggregate-functions/reference/varpop.md b/docs/zh/sql-reference/aggregate-functions/reference/varpop.md index eb17955210b..6d6b0acc615 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/varpop.md @@ -9,5 +9,6 @@ sidebar_position: 32 换句话说,计算一组数据的离差。 返回 `Float64`。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +::: diff --git a/docs/zh/sql-reference/aggregate-functions/reference/varsamp.md b/docs/zh/sql-reference/aggregate-functions/reference/varsamp.md index 9b9d0ced92d..508e35445a7 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/varsamp.md @@ -11,5 +11,6 @@ sidebar_position: 33 返回 `Float64`。 当 `n <= 1`,返回 `+∞`。 -!!! note "注" - 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +:::note +该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 +::: diff --git a/docs/zh/sql-reference/ansi.md b/docs/zh/sql-reference/ansi.md index cdccee0084f..74f13256eba 100644 --- a/docs/zh/sql-reference/ansi.md +++ b/docs/zh/sql-reference/ansi.md @@ -6,8 +6,9 @@ sidebar_label: "ANSI\u517C\u5BB9\u6027" # ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect} -!!! note "注" - 本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准. +:::note +本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准. +::: ## 行为差异 {#differences-in-behaviour} diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md index b26994a775e..601cb602a78 100644 --- a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md @@ -25,10 +25,10 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction - [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) -!!! note "注" - `SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。 - - `SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction` 。 +:::note +`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。 +`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction` 。 +::: **参数** diff --git a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.mdx b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md similarity index 59% rename from docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.mdx rename to docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 0c924feda73..fe70d29f8da 100644 --- a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.mdx +++ b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -5,6 +5,4 @@ sidebar_label: Polygon Dictionaries With Grids title: "Polygon dictionaries" --- -import Content from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md'; - - +View the [english Dictionaries doc page for details](../../../../en/sql-reference/dictionaries/index.md). diff --git a/docs/zh/sql-reference/functions/geo/index.mdx b/docs/zh/sql-reference/functions/geo/index.mdx deleted file mode 100644 index fcfc4bd4717..00000000000 --- a/docs/zh/sql-reference/functions/geo/index.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -slug: /zh/sql-reference/functions/geo/ -sidebar_label: Geo -sidebar_position: 62 -title: "Geo Functions" ---- - -import Content from '@site/docs/en/sql-reference/functions/geo/index.md'; - - diff --git a/docs/zh/sql-reference/functions/string-functions.md b/docs/zh/sql-reference/functions/string-functions.md index 89d64f76074..d1914839d7c 100644 --- a/docs/zh/sql-reference/functions/string-functions.md +++ b/docs/zh/sql-reference/functions/string-functions.md @@ -168,3 +168,15 @@ SELECT format('{} {}', 'Hello', 'World') ## trimBoth(s) {#trimboths} 返回一个字符串,用于删除任一侧的空白字符。 + +## soundex(s) + +返回一个字符串的soundex值。输出类型是FixedString,示例如下: + +``` sql +select soundex('aksql'); + +┌─soundex('aksel')─┐ +│ A240 │ +└──────────────────┘ +``` diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 756ac7c16c7..e4167127424 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -42,8 +42,9 @@ slug: /zh/sql-reference/functions/string-search-functions 对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAnyCaseInsensitive,multiSearchAnyUTF8,multiSearchAnyCaseInsensitiveUTF8`。 -!!! note "注意" - 在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于28。 +:::note +在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于28。 +::: ## 匹配(大海捞针,模式) {#matchhaystack-pattern} @@ -60,8 +61,9 @@ slug: /zh/sql-reference/functions/string-search-functions 与`match`相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式,最好使用«multisearchany»,因为它更高效。 -!!! note "注意" - 任何`haystack`字符串的长度必须小于232\字节,否则抛出异常。这种限制是因为hyperscan API而产生的。 +:::note +任何`haystack`字符串的长度必须小于232\字节,否则抛出异常。这种限制是因为hyperscan API而产生的。 +::: ## multiMatchAnyIndex(大海捞针,\[模式1,模式2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} @@ -75,11 +77,13 @@ slug: /zh/sql-reference/functions/string-search-functions 与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。 -!!! note "注意" - `multiFuzzyMatch*`函数不支持UTF-8正则表达式,由于hyperscan限制,这些表达式被按字节解析。 +:::note +`multiFuzzyMatch*`函数不支持UTF-8正则表达式,由于hyperscan限制,这些表达式被按字节解析。 +::: -!!! note "注意" - 如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。 +:::note +如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。 +::: ## 提取(大海捞针,图案) {#extracthaystack-pattern} @@ -119,5 +123,6 @@ slug: /zh/sql-reference/functions/string-search-functions 对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramSearchCaseInsensitive,ngramSearchUTF8,ngramSearchCaseInsensitiveUTF8`。 -!!! note "注意" - 对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式,我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。 +:::note +对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式,我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。 +::: diff --git a/docs/zh/sql-reference/statements/alter/delete.md b/docs/zh/sql-reference/statements/alter/delete.md index 85d3d3077a7..5eb77c35a93 100644 --- a/docs/zh/sql-reference/statements/alter/delete.md +++ b/docs/zh/sql-reference/statements/alter/delete.md @@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr 删除匹配指定过滤表达式的数据。实现为[突变](../../../sql-reference/statements/alter/index.md#mutations). -!!! note "备注" - `ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示,与OLTP数据库中的类似查询不同,这是一个不为经常使用而设计的繁重操作。 +:::note +`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示,与OLTP数据库中的类似查询不同,这是一个不为经常使用而设计的繁重操作。 +::: `filter_expr` 的类型必须是`UInt8`。该查询删除表中该表达式接受非零值的行。 diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md index b0f0fc21cbe..e173837a16c 100644 --- a/docs/zh/sql-reference/statements/alter/index.md +++ b/docs/zh/sql-reference/statements/alter/index.md @@ -1,5 +1,5 @@ --- -slug: /zh/sql-reference/statements/alter/ +slug: /zh/sql-reference/statements/alter/overview sidebar_position: 35 sidebar_label: ALTER --- @@ -17,8 +17,9 @@ sidebar_label: ALTER - [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md) - [TTL](../../../sql-reference/statements/alter/ttl.md) -!!! note "备注" - 大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。 +:::note +大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。 +::: 这些 `ALTER` 语句操作视图: diff --git a/docs/zh/sql-reference/statements/alter/order-by.md b/docs/zh/sql-reference/statements/alter/order-by.md index e70a8b59c85..e50c4e6e805 100644 --- a/docs/zh/sql-reference/statements/alter/order-by.md +++ b/docs/zh/sql-reference/statements/alter/order-by.md @@ -14,5 +14,6 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY ORDER BY new_expression 从某种意义上说,该命令是轻量级的,它只更改元数据。要保持数据部分行按排序键表达式排序的属性,您不能向排序键添加包含现有列的表达式(仅在相同的`ALTER`查询中由`ADD COLUMN`命令添加的列,没有默认的列值)。 -!!! note "备注" - 它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。 +:::note +它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。 +::: diff --git a/docs/zh/sql-reference/statements/alter/setting.md b/docs/zh/sql-reference/statements/alter/setting.md index e2d597554e7..2e1e97db331 100644 --- a/docs/zh/sql-reference/statements/alter/setting.md +++ b/docs/zh/sql-reference/statements/alter/setting.md @@ -14,8 +14,9 @@ sidebar_label: SETTING ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ... ``` -!!! note "注意" - 这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。 +:::note +这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。 +::: ## 修改设置 {#alter_modify_setting} diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md index 522eb0a705b..97b2b43d889 100644 --- a/docs/zh/sql-reference/statements/alter/update.md +++ b/docs/zh/sql-reference/statements/alter/update.md @@ -12,8 +12,9 @@ ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr 操作与指定过滤表达式相匹配的数据。作为一个[变更 mutation](../../../sql-reference/statements/alter/index.md#mutations)来实现. -!!! note "Note" - `ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明,与OLTP数据库中的类似查询不同,这是一个繁重的操作,不是为频繁使用而设计。 +:::note +`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明,与OLTP数据库中的类似查询不同,这是一个繁重的操作,不是为频繁使用而设计。 +::: `filter_expr`必须是`UInt8`类型。这个查询将指定列的值更新为行中相应表达式的值,对于这些行,`filter_expr`取值为非零。使用`CAST`操作符将数值映射到列的类型上。不支持更新用于计算主键或分区键的列。 diff --git a/docs/zh/sql-reference/statements/create/database.md b/docs/zh/sql-reference/statements/create/database.md index 2c6e53c0f06..3e5b71fb196 100644 --- a/docs/zh/sql-reference/statements/create/database.md +++ b/docs/zh/sql-reference/statements/create/database.md @@ -27,4 +27,4 @@ ClickHouse在指定集群的所有服务器上创建`db_name`数据库。 更多 ### ENGINE {#engine} -[MySQL](../../../engines/database-engines/mysql.md) 允许您从远程MySQL服务器检索数据. 默认情况下,ClickHouse使用自己的[database engine](../../../engines/database-engines/index.md). 还有一个[lazy](../../../engines/database-engines/lazy)引擎. +[MySQL](../../../engines/database-engines/mysql.md) 允许您从远程MySQL服务器检索数据. 默认情况下,ClickHouse使用自己的[database engine](../../../engines/database-engines/index.md). 还有一个[lazy](../../../engines/database-engines/lazy.md)引擎. diff --git a/docs/zh/sql-reference/statements/create/index.md b/docs/zh/sql-reference/statements/create/index.md deleted file mode 100644 index f63ed0a7acd..00000000000 --- a/docs/zh/sql-reference/statements/create/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -slug: /zh/sql-reference/statements/create/ -sidebar_label: CREATE -sidebar_position: 34 ---- - -# CREATE语法 {#create-queries} - -CREATE语法包含以下子集: - -- [DATABASE](../../../sql-reference/statements/create/database.md) diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index a000c69f1ef..8ce2d20a10c 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -63,7 +63,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 视图看起来与普通表相同。 例如,它们列在`SHOW TABLES`查询的结果中。 -删除视图,使用[DROP VIEW](../../../sql-reference/statements/drop#drop-view). `DROP TABLE`也适用于视图。 +删除视图,使用[DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). `DROP TABLE`也适用于视图。 ## Live View (实验性) {#live-view} diff --git a/docs/zh/sql-reference/statements/exchange.md b/docs/zh/sql-reference/statements/exchange.md index e6ac1dbf1dc..47cefa0d2e6 100644 --- a/docs/zh/sql-reference/statements/exchange.md +++ b/docs/zh/sql-reference/statements/exchange.md @@ -9,8 +9,9 @@ sidebar_label: EXCHANGE 以原子方式交换两个表或字典的名称。 此任务也可以通过使用[RENAME](./rename.md)来完成,但在这种情况下操作不是原子的。 -!!! note "注意" +:::note `EXCHANGE`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎. +::: **语法** diff --git a/docs/zh/sql-reference/statements/grant.md b/docs/zh/sql-reference/statements/grant.md index 12ad2e0fe25..7e7cdbff350 100644 --- a/docs/zh/sql-reference/statements/grant.md +++ b/docs/zh/sql-reference/statements/grant.md @@ -55,7 +55,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION 同样 `john` 有权执行 `GRANT OPTION`,因此他能给其它账号进行和自己账号权限范围相同的授权。 -可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GARNT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。 +可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GRANT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。 访问 `systen`数据库总是被允许的(因为这个数据库用来处理sql操作) 可以一次给多个账号进行多种授权操作。 `GRANT SELECT,INSERT ON *.* TO john,robin` 允许 `john`和`robin` 账号对任意数据库的任意表执行 `INSERT`和 `SELECT`操作。 diff --git a/docs/zh/sql-reference/statements/index.md b/docs/zh/sql-reference/statements/index.md index cf51dadc8f1..2fdfeb1786f 100644 --- a/docs/zh/sql-reference/statements/index.md +++ b/docs/zh/sql-reference/statements/index.md @@ -10,7 +10,7 @@ sidebar_position: 31 - [SELECT](../../sql-reference/statements/select/index.md) - [INSERT INTO](../../sql-reference/statements/insert-into.md) -- [CREATE](../../sql-reference/statements/create/index.md) +- [CREATE](../../sql-reference/statements/create.md) - [ALTER](../../sql-reference/statements/alter/index.md) - [SYSTEM](../../sql-reference/statements/system.md) - [SHOW](../../sql-reference/statements/show.md) @@ -20,7 +20,7 @@ sidebar_position: 31 - [CHECK TABLE](../../sql-reference/statements/check-table.mdx) - [DESCRIBE TABLE](../../sql-reference/statements/describe-table.mdx) - [DETACH](../../sql-reference/statements/detach.mdx) -- [DROP](../../sql-reference/statements/drop) +- [DROP](../../sql-reference/statements/drop.md) - [EXISTS](../../sql-reference/statements/exists.md) - [KILL](../../sql-reference/statements/kill.mdx) - [OPTIMIZE](../../sql-reference/statements/optimize.mdx) diff --git a/docs/zh/sql-reference/statements/rename.md b/docs/zh/sql-reference/statements/rename.md index c26dce306cc..156306fbd3e 100644 --- a/docs/zh/sql-reference/statements/rename.md +++ b/docs/zh/sql-reference/statements/rename.md @@ -9,8 +9,9 @@ sidebar_label: RENAME 重命名数据库、表或字典。 可以在单个查询中重命名多个实体。 请注意,具有多个实体的`RENAME`查询是非原子操作。 要以原子方式交换实体名称,请使用[EXCHANGE](./exchange.md)语法. -!!! note "注意" +:::note `RENAME`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎. +::: **语法** diff --git a/docs/zh/sql-reference/statements/select/array-join.md b/docs/zh/sql-reference/statements/select/array-join.md index b0352a7bb0a..4162a39f399 100644 --- a/docs/zh/sql-reference/statements/select/array-join.md +++ b/docs/zh/sql-reference/statements/select/array-join.md @@ -146,7 +146,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS ma └───────┴─────────┴───┴─────┴────────┘ ``` -下面的例子使用 [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate) 功能: +下面的例子使用 [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) 功能: ``` sql SELECT s, arr, a, num, arrayEnumerate(arr) @@ -259,7 +259,7 @@ ARRAY JOIN nest AS n; └───────┴─────┴─────┴─────────┴────────────┘ ``` -使用功能 [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate) 的例子: +使用功能 [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) 的例子: ``` sql SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md index 31c1649bc30..86511470538 100644 --- a/docs/zh/sql-reference/statements/select/group-by.md +++ b/docs/zh/sql-reference/statements/select/group-by.md @@ -8,11 +8,12 @@ sidebar_label: GROUP BY `GROUP BY` 子句将 `SELECT` 查询结果转换为聚合模式,其工作原理如下: - `GROUP BY` 子句包含表达式列表(或单个表达式 -- 可以认为是长度为1的列表)。 这份名单充当 “grouping key”,而每个单独的表达式将被称为 “key expressions”. -- 在所有的表达式在 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句中 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。 +- 在所有的表达式在 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句中 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。 - 聚合结果 `SELECT` 查询将包含尽可能多的行,因为有唯一值 “grouping key” 在源表中。 通常这会显着减少行数,通常是数量级,但不一定:如果所有行数保持不变 “grouping key” 值是不同的。 -!!! note "注" - 还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。 +:::note +还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。 +::: ## 空处理 {#null-processing} @@ -57,7 +58,7 @@ sidebar_label: GROUP BY - 在 `Pretty*` 格式时,该行在主结果之后作为单独的表输出。 - 在其他格式中,它不可用。 -`WITH TOTALS` 可以以不同的方式运行时 [HAVING](../../../sql-reference/statements/select/having) 是存在的。 该行为取决于 `totals_mode` 设置。 +`WITH TOTALS` 可以以不同的方式运行时 [HAVING](../../../sql-reference/statements/select/having.md) 是存在的。 该行为取决于 `totals_mode` 设置。 ### 配置总和处理 {#configuring-totals-processing} diff --git a/docs/zh/sql-reference/statements/select/index.md b/docs/zh/sql-reference/statements/select/index.md index 2d4044cbd20..fdf196e198b 100644 --- a/docs/zh/sql-reference/statements/select/index.md +++ b/docs/zh/sql-reference/statements/select/index.md @@ -41,7 +41,7 @@ SELECT [DISTINCT] expr_list - [WHERE 子句](../../../sql-reference/statements/select/where.md) - [GROUP BY 子句](../../../sql-reference/statements/select/group-by.md) - [LIMIT BY 子句](../../../sql-reference/statements/select/limit-by.md) -- [HAVING 子句](../../../sql-reference/statements/select/having) +- [HAVING 子句](../../../sql-reference/statements/select/having.md) - [SELECT 子句](#select-clause) - [DISTINCT 子句](../../../sql-reference/statements/select/distinct.md) - [LIMIT 子句](../../../sql-reference/statements/select/limit.md) diff --git a/docs/zh/sql-reference/statements/select/join.md b/docs/zh/sql-reference/statements/select/join.md index 08290a02de5..a2686aa5e53 100644 --- a/docs/zh/sql-reference/statements/select/join.md +++ b/docs/zh/sql-reference/statements/select/join.md @@ -39,8 +39,9 @@ ClickHouse中提供的其他联接类型: ## 严格 {#join-settings} -!!! note "注" - 可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。 +:::note +可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。 +::: Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. @@ -91,8 +92,9 @@ USING (equi_column1, ... equi_columnN, asof_column) `ASOF JOIN`会从 `table_2` 中的用户事件时间戳找出和 `table_1` 中用户事件时间戳中最近的一个时间戳,来满足最接近匹配的条件。如果有得话,则相等的时间戳值是最接近的值。在此例中,`user_id` 列可用于条件匹配,`ev_time` 列可用于最接近匹配。在此例中,`event_1_1` 可以 JOIN `event_2_1`,`event_1_2` 可以JOIN `event_2_3`,但是 `event_2_2` 不能被JOIN。 -!!! note "注" - `ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。 +:::note +`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。 +::: ## 分布式联接 {#global-join} diff --git a/docs/zh/sql-reference/statements/select/limit-by.md b/docs/zh/sql-reference/statements/select/limit-by.md index 50e3505b7fb..68b88bf8d7a 100644 --- a/docs/zh/sql-reference/statements/select/limit-by.md +++ b/docs/zh/sql-reference/statements/select/limit-by.md @@ -14,8 +14,9 @@ ClickHouse支持以下语法变体: 处理查询时,ClickHouse首先选择经由排序键排序过后的数据。排序键可以显式地使用[ORDER BY](order-by.md#select-order-by)从句指定,或隐式地使用表引擎使用的排序键(数据的顺序仅在使用[ORDER BY](order-by.md#select-order-by)时才可以保证,否则由于多线程处理,数据顺序会随机化)。然后ClickHouse执行`LIMIT n BY expressions`从句,将每一行按 `expressions` 的值进行分组,并对每一分组返回前`n`行。如果指定了`OFFSET`,那么对于每一分组,ClickHouse会跳过前`offset_value`行,接着返回前`n`行。如果`offset_value`大于某一分组的行数,ClickHouse会从分组返回0行。 -!!! note "注" - `LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。 +:::note +`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。 +::: ## 例 {#examples} diff --git a/docs/zh/sql-reference/statements/select/sample.md b/docs/zh/sql-reference/statements/select/sample.md index f701bd3b805..0993958b029 100644 --- a/docs/zh/sql-reference/statements/select/sample.md +++ b/docs/zh/sql-reference/statements/select/sample.md @@ -15,8 +15,9 @@ sidebar_label: SAMPLE - 当您的原始数据不准确时,所以近似不会明显降低质量。 - 业务需求的目标是近似结果(为了成本效益,或者向高级用户推销确切结果)。 -!!! note "注" - 您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). +:::note +您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). +::: 下面列出了数据采样的功能: diff --git a/docs/zh/sql-reference/statements/select/where.md b/docs/zh/sql-reference/statements/select/where.md index fe165e8ad16..6c7183840c7 100644 --- a/docs/zh/sql-reference/statements/select/where.md +++ b/docs/zh/sql-reference/statements/select/where.md @@ -11,9 +11,10 @@ sidebar_label: WHERE 如果基础表引擎支持,`WHERE`表达式会使用索引和分区进行剪枝。 -!!! note "注" - 有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西. - +:::note +有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西. +::: + 如果需要测试一个 [NULL](../../../sql-reference/syntax.md#null-literal) 值,请使用 [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) 运算符或 [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) 和 [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) 函数。否则带有 NULL 的表达式永远不会通过。 **示例** diff --git a/docs/zh/sql-reference/statements/system.md b/docs/zh/sql-reference/statements/system.md index d8d60c28af5..8fd2dd74d26 100644 --- a/docs/zh/sql-reference/statements/system.md +++ b/docs/zh/sql-reference/statements/system.md @@ -124,10 +124,9 @@ ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name] ``` - -!!! note "Note" - `DETACH / ATTACH` 表操作会在后台进行表的merge操作,甚至当所有MergeTree表的合并操作已经停止的情况下。 - +:::note +`DETACH / ATTACH` 表操作会在后台进行表的merge操作,甚至当所有MergeTree表的合并操作已经停止的情况下。 +::: ### START MERGES {#query_language-system-start-merges} @@ -241,7 +240,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ``` sql -SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name +SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL] ``` ### RESTART REPLICA {#query_language-system-restart-replica} diff --git a/docs/zh/sql-reference/table-functions/mysql.md b/docs/zh/sql-reference/table-functions/mysql.md index 6c9753b9b12..4efee2e616b 100644 --- a/docs/zh/sql-reference/table-functions/mysql.md +++ b/docs/zh/sql-reference/table-functions/mysql.md @@ -49,8 +49,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 与原始MySQL表具有相同列的表对象。 -!!! note "注意" - 在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。 +:::note +在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。 +::: ## 用法示例 {#usage-example} diff --git a/docs/zh/sql-reference/table-functions/url.md b/docs/zh/sql-reference/table-functions/url.md index d3b7665d21b..c8ca9b775b2 100644 --- a/docs/zh/sql-reference/table-functions/url.md +++ b/docs/zh/sql-reference/table-functions/url.md @@ -41,3 +41,11 @@ CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory; INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42); SELECT * FROM test_table; ``` +## 虚拟列 {#virtual-columns} + +- `_path` — `URL`路径。 +- `_file` — 资源名称。 + +**另请参阅** + +- [虚拟列](https://clickhouse.com/docs/en/operations/table_engines/#table_engines-virtual_columns) diff --git a/packages/clickhouse-keeper.service b/packages/clickhouse-keeper.service index 2809074c93a..e4ec5bf4ede 100644 --- a/packages/clickhouse-keeper.service +++ b/packages/clickhouse-keeper.service @@ -14,7 +14,8 @@ User=clickhouse Group=clickhouse Restart=always RestartSec=30 -RuntimeDirectory=%p # %p is resolved to the systemd unit name +# %p is resolved to the systemd unit name +RuntimeDirectory=%p ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid # Minus means that this file is optional. EnvironmentFile=-/etc/default/%p diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 090461df988..7742d8b278a 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -18,7 +18,7 @@ Group=clickhouse Restart=always RestartSec=30 # Since ClickHouse is systemd aware default 1m30sec may not be enough -TimeoutStartSec=infinity +TimeoutStartSec=0 # %p is resolved to the systemd unit name RuntimeDirectory=%p ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 5b97daf2998..47017a94cb5 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -400,10 +400,6 @@ endif () add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) -if (USE_GDB_ADD_INDEX) - add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM) -endif() - if (USE_BINARY_HASH) add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM) endif() diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 994f9b7ac4d..466a0c194f7 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -43,6 +44,12 @@ namespace fs = std::filesystem; * The tool emulates a case with fixed amount of simultaneously executing queries. */ +namespace CurrentMetrics +{ + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} + namespace DB { @@ -103,7 +110,7 @@ public: settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), - pool(concurrency) + pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency) { const auto secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable; size_t connections_cnt = std::max(ports_.size(), hosts_.size()); diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 660b8d7c00a..5870327c3b5 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -277,11 +277,11 @@ void Client::initialize(Poco::Util::Application & self) */ const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe) - if (env_user) + if (env_user && !config().has("user")) config().setString("user", env_user); const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe) - if (env_password) + if (env_password && !config().has("password")) config().setString("password", env_password); parseConnectionsCredentials(); @@ -862,7 +862,8 @@ bool Client::processWithFuzzing(const String & full_query) const auto * tmp_pos = text_2.c_str(); const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), false /* allow_multi_statements */); - const auto text_3 = ast_3->formatForErrorMessage(); + const auto text_3 = ast_3 ? ast_3->formatForErrorMessage() : ""; + if (text_3 != text_2) { fmt::print(stderr, "Found error: The query formatting is broken.\n"); @@ -877,7 +878,7 @@ bool Client::processWithFuzzing(const String & full_query) fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); - fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3->dumpTree()); + fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3 ? ast_3->dumpTree() : ""); fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3); fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n"); @@ -1180,7 +1181,7 @@ void Client::processOptions(const OptionsDescription & options_description, void Client::processConfig() { /// Batch mode is enabled if one of the following is true: - /// - -e (--query) command line option is present. + /// - -q (--query) command line option is present. /// The value of the option is used as the text of query (or of multiple queries). /// If stdin is not a terminal, INSERT data for the first query is read from it. /// - stdin is not a terminal. In this case queries are read from it. @@ -1380,6 +1381,13 @@ void Client::readArguments( allow_repeated_settings = true; else if (arg == "--allow_merge_tree_settings") allow_merge_tree_settings = true; + else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) + { + /// Transform the abbreviated syntax '--multiquery ' into the full syntax '--multiquery -q ' + ++arg_num; + arg = argv[arg_num]; + addMultiquery(arg, common_arguments); + } else common_arguments.emplace_back(arg); } diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index b60138b5692..cc25747702a 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -66,6 +66,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) using namespace DB; namespace po = boost::program_options; + bool print_stacktrace = false; try { po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); @@ -84,6 +85,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ("level", po::value(), "compression level for codecs specified via flags") ("none", "use no compression instead of LZ4") ("stat", "print block statistics of compressed data") + ("stacktrace", "print stacktrace of exception") ; po::positional_options_description positional_desc; @@ -107,6 +109,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) bool use_deflate_qpl = options.count("deflate_qpl"); bool stat_mode = options.count("stat"); bool use_none = options.count("none"); + print_stacktrace = options.count("stacktrace"); unsigned block_size = options["block-size"].as(); std::vector codecs; if (options.count("codec")) @@ -188,11 +191,12 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) /// Compression CompressedWriteBuffer to(*wb, codec, block_size); copyData(*rb, to); + to.finalize(); } } catch (...) { - std::cerr << getCurrentExceptionMessage(true) << '\n'; + std::cerr << getCurrentExceptionMessage(print_stacktrace) << '\n'; return getCurrentExceptionCode(); } diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index bc882719a08..efe7121cace 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,12 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} + namespace DB { @@ -192,7 +199,7 @@ void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, { /// Fetch partitions list from a shard { - ThreadPool thread_pool(num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); + ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); for (const TaskShardPtr & task_shard : task_table.all_shards) thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() @@ -1757,8 +1764,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT LOG_INFO(log, "All helping tables dropped partition {}", partition_name); } -String ClusterCopier::getRemoteCreateTable( - const DatabaseAndTableName & table, Connection & connection, const Settings & settings) +String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings) { auto remote_context = Context::createCopy(context); remote_context->setSettings(settings); @@ -1767,7 +1773,7 @@ String ClusterCopier::getRemoteCreateTable( QueryPipelineBuilder builder; builder.init(Pipe(std::make_shared( - std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false))); + std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false, /* async_query_sending= */ false))); Block block = getBlockWithAllStreamData(std::move(builder)); return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); } @@ -1777,8 +1783,10 @@ ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & time { /// Fetch and parse (possibly) new definition auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_pull, true); - String create_query_pull_str - = getRemoteCreateTable(task_shard.task_table.table_pull, *connection_entry, task_cluster->settings_pull); + String create_query_pull_str = getRemoteCreateTable( + task_shard.task_table.table_pull, + *connection_entry, + task_cluster->settings_pull); ParserCreateQuery parser_create_query; const auto & settings = getContext()->getSettingsRef(); @@ -1867,8 +1875,8 @@ std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti String query; { WriteBufferFromOwnString wb; - wb << "SELECT DISTINCT " << partition_name << " AS partition FROM" - << " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC"; + wb << "SELECT " << partition_name << " AS partition FROM " + << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC"; query = wb.str(); } @@ -2025,8 +2033,8 @@ UInt64 ClusterCopier::executeQueryOnCluster( /// For unknown reason global context is passed to IStorage::read() method /// So, task_identifier is passed as constructor argument. It is more obvious. auto remote_query_executor = std::make_shared( - *connections.back(), query, header, getContext(), - /*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete); + *connections.back(), query, header, getContext(), + /*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete); try { diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index b2994b90e23..822289dd89c 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -1,4 +1,5 @@ #include "ClusterCopierApp.h" +#include #include #include #include @@ -192,6 +193,8 @@ void ClusterCopierApp::mainImpl() if (!task_file.empty()) copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); + zkutil::validateZooKeeperConfig(config()); + copier->init(); copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef())); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index b3c9936cd33..48f4b0fab09 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp index 451a33a1c02..d055ceb4c7b 100644 --- a/programs/copier/TaskTable.cpp +++ b/programs/copier/TaskTable.cpp @@ -4,9 +4,11 @@ #include "TaskCluster.h" #include +#include #include + namespace DB { namespace ErrorCodes diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index 3d4a11186e3..36dcfa50842 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -175,7 +175,7 @@ public: Coordination::Stat stat{}; String _some_data; auto watch_callback = - [stale = stale] (const Coordination::WatchResponse & rsp) + [my_stale = stale] (const Coordination::WatchResponse & rsp) { auto logger = &Poco::Logger::get("ClusterCopier"); if (rsp.error == Coordination::Error::ZOK) @@ -184,11 +184,11 @@ public: { case Coordination::CREATED: LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); - stale->store(true); + my_stale->store(true); break; case Coordination::CHANGED: LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path); - stale->store(true); + my_stale->store(true); } } }; diff --git a/programs/diagnostics/go.mod b/programs/diagnostics/go.mod index 58487fced80..34c6b0037ae 100644 --- a/programs/diagnostics/go.mod +++ b/programs/diagnostics/go.mod @@ -33,6 +33,7 @@ require ( github.com/cenkalti/backoff/v4 v4.2.0 // indirect github.com/containerd/containerd v1.6.17 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/distribution/distribution v2.8.2+incompatible // indirect github.com/docker/distribution v2.8.1+incompatible // indirect github.com/docker/docker v23.0.0+incompatible // indirect github.com/docker/go-units v0.5.0 // indirect diff --git a/programs/diagnostics/go.sum b/programs/diagnostics/go.sum index 71c3cbcd2d6..a95dfb4fd2b 100644 --- a/programs/diagnostics/go.sum +++ b/programs/diagnostics/go.sum @@ -126,6 +126,8 @@ github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/distribution/distribution v2.8.2+incompatible h1:k9+4DKdOG+quPFZXT/mUsiQrGu9vYCp+dXpuPkuqhk8= +github.com/distribution/distribution v2.8.2+incompatible/go.mod h1:EgLm2NgWtdKgzF9NpMzUKgzmR7AMmb0VQi2B+ZzDRjc= github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68= github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v23.0.0+incompatible h1:L6c28tNyqZ4/ub9AZC9d5QUuunoHHfEH4/Ue+h/E5nE= diff --git a/programs/diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go index b93c4fc3350..938c34281f1 100644 --- a/programs/diagnostics/internal/platform/data/file_test.go +++ b/programs/diagnostics/internal/platform/data/file_test.go @@ -135,7 +135,7 @@ func TestConfigFileFrameCopy(t *testing.T) { sizes := map[string]int64{ "users.xml": int64(2017), "default-password.xml": int64(188), - "config.xml": int64(61260), + "config.xml": int64(61662), "server-include.xml": int64(168), "user-include.xml": int64(559), } diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index 18997855955..21a0821f89d 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -1260,8 +1260,12 @@ REPLACE_ME REPLACE_ME +

      Authorization: Bearer SOME-TOKEN
      your_base64_encoded_customer_key + REPLACE_ME + REPLACE_ME + true http://proxy1 http://proxy2 diff --git a/programs/extract-from-config/ExtractFromConfig.cpp b/programs/extract-from-config/ExtractFromConfig.cpp index 25b03550803..5305c61b730 100644 --- a/programs/extract-from-config/ExtractFromConfig.cpp +++ b/programs/extract-from-config/ExtractFromConfig.cpp @@ -89,8 +89,12 @@ static std::vector extractFromConfig( if (has_zk_includes && process_zk_includes) { DB::ConfigurationPtr bootstrap_configuration(new Poco::Util::XMLConfiguration(config_xml)); + + zkutil::validateZooKeeperConfig(*bootstrap_configuration); + zkutil::ZooKeeperPtr zookeeper = std::make_shared( - *bootstrap_configuration, "zookeeper", nullptr); + *bootstrap_configuration, bootstrap_configuration->has("zookeeper") ? "zookeeper" : "keeper", nullptr); + zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; }); config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache); } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d568012bb26..d83e189f7ef 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -222,6 +222,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv) ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user to create") ("group", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_GROUP), "clickhouse group to create") + ("noninteractive,y", "run non-interactively") + ("link", "create symlink to the binary instead of copying to binary-path") ; po::variables_map options; @@ -267,8 +269,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv) /// Copy binary to the destination directory. - /// TODO An option to link instead of copy - useful for developers. - fs::path prefix = options["prefix"].as(); fs::path bin_dir = prefix / options["binary-path"].as(); @@ -281,76 +281,136 @@ int mainEntryClickHouseInstall(int argc, char ** argv) bool old_binary_exists = fs::exists(main_bin_path); bool already_installed = false; - /// Check if the binary is the same file (already installed). - if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path)) + if (options.count("link")) { - already_installed = true; - fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string()); - } - /// Check if binary has the same content. - else if (old_binary_exists && binary_size == fs::file_size(main_bin_path)) - { - fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n", - main_bin_path.string()); - - if (filesEqual(binary_self_path.string(), main_bin_path.string())) + if (old_binary_exists) { - already_installed = true; - fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n", - main_bin_path.string(), binary_self_canonical_path.string()); - } - } + bool is_symlink = FS::isSymlink(main_bin_path); + fs::path points_to; + if (is_symlink) + points_to = fs::weakly_canonical(FS::readSymlink(main_bin_path)); - if (already_installed) - { - if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) - throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR); + if (is_symlink && points_to == binary_self_canonical_path) + { + already_installed = true; + } + else + { + if (!is_symlink) + { + fmt::print("File {} already exists but it's not a symlink. Will rename to {}.\n", + main_bin_path.string(), main_bin_old_path.string()); + fs::rename(main_bin_path, main_bin_old_path); + } + else if (points_to != main_bin_path) + { + fmt::print("Symlink {} already exists but it points to {}. Will replace the old symlink to {}.\n", + main_bin_path.string(), points_to.string(), binary_self_canonical_path.string()); + fs::remove(main_bin_path); + } + } + } + + if (!already_installed) + { + if (!fs::exists(bin_dir)) + { + fmt::print("Creating binary directory {}.\n", bin_dir.string()); + fs::create_directories(bin_dir); + } + + fmt::print("Creating symlink {} to {}.\n", main_bin_path.string(), binary_self_canonical_path.string()); + fs::create_symlink(binary_self_canonical_path, main_bin_path); + + if (0 != chmod(binary_self_canonical_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) + throwFromErrno(fmt::format("Cannot chmod {}", binary_self_canonical_path.string()), ErrorCodes::SYSTEM_ERROR); + } } else { - if (!fs::exists(bin_dir)) + bool is_symlink = FS::isSymlink(main_bin_path); + + if (!is_symlink) { - fmt::print("Creating binary directory {}.\n", bin_dir.string()); - fs::create_directories(bin_dir); + /// Check if the binary is the same file (already installed). + if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path)) + { + already_installed = true; + fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string()); + } + /// Check if binary has the same content. + else if (old_binary_exists && binary_size == fs::file_size(main_bin_path)) + { + fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n", + main_bin_path.string()); + + if (filesEqual(binary_self_path.string(), main_bin_path.string())) + { + already_installed = true; + fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n", + main_bin_path.string(), binary_self_canonical_path.string()); + } + } } - size_t available_space = fs::space(bin_dir).available; - if (available_space < binary_size) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.", - bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space)); - - fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string()); - - try + if (already_installed) { - ReadBufferFromFile in(binary_self_path.string()); - WriteBufferFromFile out(main_bin_tmp_path.string()); - copyData(in, out); - out.sync(); - - if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) - throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR); - - out.finalize(); + if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) + throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR); } - catch (const Exception & e) + else { - if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0) - std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n'; - throw; + if (!fs::exists(bin_dir)) + { + fmt::print("Creating binary directory {}.\n", bin_dir.string()); + fs::create_directories(bin_dir); + } + + size_t available_space = fs::space(bin_dir).available; + if (available_space < binary_size) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.", + bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space)); + + fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string()); + + try + { + String source = binary_self_path.string(); + String destination = main_bin_tmp_path.string(); + + /// Try to make a hard link first, as an optimization. + /// It is possible if the source and the destination are on the same filesystems. + if (0 != link(source.c_str(), destination.c_str())) + { + ReadBufferFromFile in(binary_self_path.string()); + WriteBufferFromFile out(main_bin_tmp_path.string()); + copyData(in, out); + out.sync(); + out.finalize(); + } + + if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) + throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0) + std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n'; + throw; + } + + if (old_binary_exists) + { + fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n", + main_bin_path.string(), main_bin_old_path.string()); + + /// There is file exchange operation in Linux but it's not portable. + fs::rename(main_bin_path, main_bin_old_path); + } + + fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string()); + fs::rename(main_bin_tmp_path, main_bin_path); } - - if (old_binary_exists) - { - fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n", - main_bin_path.string(), main_bin_old_path.string()); - - /// There is file exchange operation in Linux but it's not portable. - fs::rename(main_bin_path, main_bin_old_path); - } - - fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string()); - fs::rename(main_bin_tmp_path, main_bin_path); } /// Create symlinks. @@ -384,7 +444,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (is_symlink) points_to = fs::weakly_canonical(FS::readSymlink(symlink_path)); - if (is_symlink && points_to == main_bin_path) + if (is_symlink && (points_to == main_bin_path || (options.count("link") && points_to == binary_self_canonical_path))) { need_to_create = false; } @@ -709,7 +769,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) /// dpkg or apt installers can ask for non-interactive work explicitly. const char * debian_frontend_var = getenv("DEBIAN_FRONTEND"); // NOLINT(concurrency-mt-unsafe) - bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive"); + bool noninteractive = (debian_frontend_var && debian_frontend_var == std::string_view("noninteractive")) + || options.count("noninteractive"); + bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 9b01e6920a4..1f1138f49eb 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -69,6 +69,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp @@ -114,7 +115,7 @@ if (BUILD_STANDALONE_KEEPER) clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) # Remove some redundant dependencies - target_compile_definitions (clickhouse-keeper PRIVATE -DKEEPER_STANDALONE_BUILD) + target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_PROGRAM_STANDALONE_BUILD) target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG) target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../src") # uses includes from src directory @@ -128,6 +129,7 @@ if (BUILD_STANDALONE_KEEPER) ch_contrib::lz4 ch_contrib::zstd ch_contrib::cityhash + ch_contrib::jemalloc common ch_contrib::double_conversion ch_contrib::dragonbox_to_chars pcg_random diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index ed3297ed7cb..7633465c5e5 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -11,13 +11,15 @@ #include #include #include +#include +#include +#include #include #include #include #include #include #include -#include #include #include #include @@ -58,7 +60,7 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef KEEPER_STANDALONE_BUILD +#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD // Weak symbols don't work correctly on Darwin // so we have a stub implementation to avoid linker errors @@ -76,92 +78,9 @@ namespace ErrorCodes extern const int NO_ELEMENTS_IN_CONFIG; extern const int SUPPORT_IS_DISABLED; extern const int NETWORK_ERROR; - extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; - extern const int FAILED_TO_GETPWUID; extern const int LOGICAL_ERROR; } -namespace -{ - -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) -{ - const size_t sleep_max_ms = 1000 * seconds_to_wait; - const size_t sleep_one_ms = 100; - size_t sleep_current_ms = 0; - size_t current_connections = 0; - for (;;) - { - current_connections = 0; - - for (auto & server : servers) - { - server.stop(); - current_connections += server.currentConnections(); - } - - if (!current_connections) - break; - - sleep_current_ms += sleep_one_ms; - if (sleep_current_ms < sleep_max_ms) - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms)); - else - break; - } - return current_connections; -} - -Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log) -{ - Poco::Net::SocketAddress socket_address; - try - { - socket_address = Poco::Net::SocketAddress(host, port); - } - catch (const Poco::Net::DNSException & e) - { - const auto code = e.code(); - if (code == EAI_FAMILY -#if defined(EAI_ADDRFAMILY) - || code == EAI_ADDRFAMILY -#endif - ) - { - LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. " - "If it is an IPv6 address and your host has disabled IPv6, then consider to " - "specify IPv4 address to listen in element of configuration " - "file. Example: 0.0.0.0", - host, e.code(), e.message()); - } - - throw; - } - return socket_address; -} - -std::string getUserName(uid_t user_id) -{ - /// Try to convert user id into user name. - auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); - if (buffer_size <= 0) - buffer_size = 1024; - std::string buffer; - buffer.reserve(buffer_size); - - struct passwd passwd_entry; - struct passwd * result = nullptr; - const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result); - - if (error) - throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error); - else if (result) - return result->pw_name; - return toString(user_id); -} - -} - Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const { auto address = makeSocketAddress(host, port, &logger()); @@ -315,12 +234,12 @@ struct Keeper::KeeperHTTPContext : public IHTTPContext Poco::Timespan getReceiveTimeout() const override { - return context->getConfigRef().getUInt64("keeper_server.http_receive_timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); + return {context->getConfigRef().getInt64("keeper_server.http_receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0}; } Poco::Timespan getSendTimeout() const override { - return context->getConfigRef().getUInt64("keeper_server.http_send_timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); + return {context->getConfigRef().getInt64("keeper_server.http_send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0}; } TinyContextPtr context; @@ -365,24 +284,7 @@ try std::filesystem::create_directories(path); /// Check that the process user id matches the owner of the data. - const auto effective_user_id = geteuid(); - struct stat statbuf; - if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) - { - const auto effective_user = getUserName(effective_user_id); - const auto data_owner = getUserName(statbuf.st_uid); - std::string message = "Effective user of the process (" + effective_user + - ") does not match the owner of the data (" + data_owner + ")."; - if (effective_user_id == 0) - { - message += " Run under 'sudo -u " + data_owner + "'."; - throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); - } - else - { - LOG_WARNING(log, fmt::runtime(message)); - } - } + assertProcessUserMatchesDataOwner(path, [&](const std::string & message){ LOG_WARNING(log, fmt::runtime(message)); }); DB::ServerUUID::load(path + "/uuid", log); @@ -445,6 +347,9 @@ try return tiny_context->getConfigRef(); }; + auto tcp_receive_timeout = config().getInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC); + auto tcp_send_timeout = config().getInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC); + for (const auto & listen_host : listen_hosts) { /// TCP Keeper @@ -453,8 +358,8 @@ try { Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); - socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + socket.setReceiveTimeout(Poco::Timespan{tcp_receive_timeout, 0}); + socket.setSendTimeout(Poco::Timespan{tcp_send_timeout, 0}); servers->emplace_back( listen_host, port_name, @@ -462,8 +367,7 @@ try std::make_unique( new KeeperTCPHandlerFactory( config_getter, tiny_context->getKeeperDispatcher(), - config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), - config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), false), server_pool, socket)); + tcp_receive_timeout, tcp_send_timeout, false), server_pool, socket)); }); const char * secure_port_name = "keeper_server.tcp_port_secure"; @@ -472,8 +376,8 @@ try #if USE_SSL Poco::Net::SecureServerSocket socket; auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); - socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + socket.setReceiveTimeout(Poco::Timespan{tcp_receive_timeout, 0}); + socket.setSendTimeout(Poco::Timespan{tcp_send_timeout, 0}); servers->emplace_back( listen_host, secure_port_name, @@ -481,8 +385,7 @@ try std::make_unique( new KeeperTCPHandlerFactory( config_getter, tiny_context->getKeeperDispatcher(), - config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), - config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), true), server_pool, socket)); + tcp_receive_timeout, tcp_send_timeout, true), server_pool, socket)); #else UNUSED(port); throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); @@ -490,26 +393,26 @@ try }); const auto & config = config_getter(); + auto http_context = httpContext(); Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC); + http_params->setTimeout(http_context->getReceiveTimeout()); http_params->setKeepAliveTimeout(keep_alive_timeout); /// Prometheus (if defined and not setup yet with http_port) port_name = "prometheus.port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) + createServer(listen_host, port_name, listen_try, [&, my_http_context = std::move(http_context)](UInt16 port) mutable { Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, listen_host, port); - auto http_context = httpContext(); - socket.setReceiveTimeout(http_context->getReceiveTimeout()); - socket.setSendTimeout(http_context->getSendTimeout()); + socket.setReceiveTimeout(my_http_context->getReceiveTimeout()); + socket.setSendTimeout(my_http_context->getSendTimeout()); servers->emplace_back( listen_host, port_name, "Prometheus: http://" + address.toString(), std::make_unique( - std::move(http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); } diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index ab81472be88..9642dd7ee63 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -158,6 +158,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ if (cloned) { writeStringBinary("1", out); + out.finalize(); + return; } else { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8e092bdf8e4..e026f87279a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -26,12 +26,13 @@ #include #include #include +#include #include #include #include #include #include -#include +#include #include #include #include @@ -133,6 +134,11 @@ void LocalServer::initialize(Poco::Util::Application & self) config().getUInt("max_io_thread_pool_size", 100), config().getUInt("max_io_thread_pool_free_size", 0), config().getUInt("io_thread_pool_queue_size", 10000)); + + OutdatedPartsLoadingThreadPool::initialize( + config().getUInt("max_outdated_parts_loading_thread_pool_size", 16), + 0, // We don't need any threads one all the parts will be loaded + config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000)); } @@ -600,13 +606,13 @@ void LocalServer::processConfig() String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", ""); size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); if (uncompressed_cache_size) - global_context->setUncompressedCache(uncompressed_cache_size, uncompressed_cache_policy); + global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); /// Size of cache for marks (index of MergeTree family of tables). String mark_cache_policy = config().getString("mark_cache_policy", ""); size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); if (mark_cache_size) - global_context->setMarkCache(mark_cache_size, mark_cache_policy); + global_context->setMarkCache(mark_cache_policy, mark_cache_size); /// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0); @@ -812,8 +818,16 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum { for (int arg_num = 1; arg_num < argc; ++arg_num) { - const char * arg = argv[arg_num]; - common_arguments.emplace_back(arg); + std::string_view arg = argv[arg_num]; + if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-')) + { + /// Transform the abbreviated syntax '--multiquery ' into the full syntax '--multiquery -q ' + ++arg_num; + arg = argv[arg_num]; + addMultiquery(arg, common_arguments); + } + else + common_arguments.emplace_back(arg); } } diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 274ad29a174..add16ec5205 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/programs/obfuscator/README.md b/programs/obfuscator/README.md new file mode 100644 index 00000000000..371af6344ef --- /dev/null +++ b/programs/obfuscator/README.md @@ -0,0 +1,354 @@ +## clickhouse-obfuscator — a tool for dataset anonymization + +### Installation And Usage + +``` +curl https://clickhouse.com/ | sh +./clickhouse obfuscator --help +``` + +### Example + +``` +./clickhouse obfuscator --seed 123 --input-format TSV --output-format TSV \ + --structure 'CounterID UInt32, URLDomain String, URL String, SearchPhrase String, Title String' \ + < source.tsv > result.tsv +``` + + +### A long, long time ago... + +ClickHouse users already know that its biggest advantage is its high-speed processing of analytical queries. But claims like this need to be confirmed with reliable performance testing. That's what we want to talk about today. + +![benchmarks.png](https://clickhouse.com/uploads/benchmarks_24f1904cc9.png) + +We started running tests in 2013, long before ClickHouse was available as open source. Back then, our main concern was data processing speed for a web analytics product. We started storing this data, which we would later store in ClickHouse, in January 2009. Part of the data had been written to a database starting in 2012, and part was converted from OLAPServer and Metrage (data structures previously used by the solution). For testing, we took the first subset at random from data for 1 billion pageviews. Our web analytics platform didn't have any queries at that point, so we came up with queries that interested us, using all the possible ways to filter, aggregate, and sort the data. + +ClickHouse performance was compared with similar systems like Vertica and MonetDB. To avoid bias, testing was performed by an employee who hadn't participated in ClickHouse development, and special cases in the code were not optimized until all the results were obtained. We used the same approach to get a data set for functional testing. + +After ClickHouse was released as open source in 2016, people began questioning these tests. + +## Shortcomings of tests on private data + +Our performance tests: + +- Couldn't be reproduced independently because they used private data that can't be published. Some of the functional tests are not available to external users for the same reason. +- Needed further development. The set of tests needed to be substantially expanded in order to isolate performance changes in individual parts of the system. +- Didn't run on a per-commit basis or for individual pull requests. External developers couldn't check their code for performance regressions. + +We could solve these problems by throwing out the old tests and writing new ones based on open data, like [flight data for the USA](https://clickhouse.com/docs/en/getting-started/example-datasets/ontime/) and [taxi rides in New York](https://clickhouse.com/docs/en/getting-started/example-datasets/nyc-taxi). Or we could use benchmarks like TPC-H, TPC-DS, and [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema). The disadvantage is that this data was very different from web analytics data, and we would rather keep the test queries. + +### Why it's important to use real data + +Performance should only be tested on real data from a production environment. Let's look at some examples. + +### Example 1 + +Let's say you fill a database with evenly distributed pseudorandom numbers. Data compression isn't going to work in this case, although data compression is essential to analytical databases. There is no silver bullet solution to the challenge of choosing the right compression algorithm and the right way to integrate it into the system since data compression requires a compromise between the speed of compression and decompression and the potential compression efficiency. But systems that can't compress data are guaranteed losers. If your tests use evenly distributed pseudorandom numbers, this factor is ignored, and the results will be distorted. + +Bottom line: Test data must have a realistic compression ratio. + +### Example 2 + +Let's say we are interested in the execution speed of this SQL query: + +```sql +SELECT RegionID, uniq(UserID) AS visitors + FROM test.hits +GROUP BY RegionID +ORDER BY visitors DESC +LIMIT 10 +``` + +This was a typical query for web analytics product. What affects the processing speed? + +- How `GROUP BY` is executed. +- Which data structure is used for calculating the `uniq` aggregate function. +- How many different RegionIDs there are and how much RAM each state of the `uniq` function requires. + +But another important factor is that the amount of data is distributed unevenly between regions. (It probably follows a power law. I put the distribution on a log-log graph, but I can't say for sure.) If this is the case, the states of the `uniq` aggregate function with fewer values must use very little memory. When there are a lot of different aggregation keys, every single byte counts. How can we get generated data that has all these properties? The obvious solution is to use real data. + +Many DBMSs implement the HyperLogLog data structure for an approximation of COUNT(DISTINCT), but none of them work very well because this data structure uses a fixed amount of memory. ClickHouse has a function that uses [a combination of three different data structures](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/uniqcombined), depending on the size of the data set. + +Bottom line: Test data must represent distribution properties of the real data well enough, meaning cardinality (number of distinct values per column) and cross-column cardinality (number of different values counted across several different columns). + +### Example 3 + +Instead of testing the performance of the ClickHouse DBMS, let's take something simpler, like hash tables. For hash tables, it's essential to choose the right hash function. This is not as important for `std::unordered_map`, because it's a hash table based on chaining, and a prime number is used as the array size. The standard library implementation in GCC and Clang uses a trivial hash function as the default hash function for numeric types. However, `std::unordered_map` is not the best choice when we are looking for maximum speed. With an open-addressing hash table, we can't just use a standard hash function. Choosing the right hash function becomes the deciding factor. + +It's easy to find hash table performance tests using random data that don't take the hash functions used into account. Many hash function tests also focus on the calculation speed and certain quality criteria, even though they ignore the data structures used. But the fact is that hash tables and HyperLogLog require different hash function quality criteria. + +![alexey_chat.png](https://clickhouse.com/uploads/alexey_chat_3f8db88301.png) + +## Challenge + +Our goal was to obtain data for testing performance that had the same structure as our web analytics data with all the properties that are important for benchmarks, but in such a way that there remain no traces of real website users in this data. In other words, the data must be anonymized and still preserve its: + +* Compression ratio. +* Cardinality (the number of distinct values). +* Mutual cardinality between several different columns. +* Properties of probability distributions that can be used for data modeling (for example, if we believe that regions are distributed according to a power law, then the exponent — the distribution parameter — should be approximately the same for artificial data and for real data). + +How can we get a similar compression ratio for the data? If LZ4 is used, substrings in binary data must be repeated at approximately the same distance, and the repetitions must be approximately the same length. For ZSTD, entropy per byte must also coincide. + +The ultimate goal was to create a publicly available tool that anyone can use to anonymize their data sets for publication. This would allow us to debug and test performance on other people's data similar to our production data. We would also like the generated data to be interesting. + +However, these are very loosely-defined requirements, and we aren't planning to write up a formal problem statement or specification for this task. + +## Possible solutions + +I don't want to make it sound like this problem was particularly important. It was never actually included in planning, and no one had intentions to work on it. I hoped that an idea would come up someday, and suddenly I would be in a good mood and be able to put everything else off until later. + +### Explicit probabilistic models + +- We want to preserve the continuity of time series data. This means that for some types of data, we need to model the difference between neighboring values rather than the value itself. +- To model "joint cardinality" of columns, we would also have to explicitly reflect dependencies between columns. For instance, there are usually very few IP addresses per user ID, so to generate an IP address, we would have to use a hash value of the user ID as a seed and add a small amount of other pseudorandom data. +- We weren't sure how to express the dependency that the same user frequently visits URLs with matching domains at approximately the same time. + +All this can be written in a C++ "script" with the distributions and dependencies hard coded. However, Markov models are obtained from a combination of statistics with smoothing and adding noise. I started writing a script like this, but after writing explicit models for ten columns, it became unbearably boring — and the "hits" table in the web analytics product had more than 100 columns way back in 2012. + +```c++ +EventTime.day(std::discrete_distribution<>({ + 0, 0, 13, 30, 0, 14, 42, 5, 6, 31, 17, 0, 0, 0, 0, 23, 10, ...})(random)); +EventTime.hour(std::discrete_distribution<>({ + 13, 7, 4, 3, 2, 3, 4, 6, 10, 16, 20, 23, 24, 23, 18, 19, 19, ...})(random)); +EventTime.minute(std::uniform_int_distribution(0, 59)(random)); +EventTime.second(std::uniform_int_distribution(0, 59)(random)); + +UInt64 UserID = hash(4, powerLaw(5000, 1.1)); +UserID = UserID / 10000000000ULL * 10000000000ULL + + static_cast(EventTime) + UserID % 1000000; + +random_with_seed.seed(powerLaw(5000, 1.1)); +auto get_random_with_seed = [&]{ return random_with_seed(); }; +``` + +Advantages: + +- Conceptual simplicity. + +Disadvantages: + +- A large amount of work is required. +- The solution only applies to one type of data. + +And I preferred a more general solution that can be used for obfuscating any dataset. + +In any case, this solution could be improved. Instead of manually selecting models, we could implement a catalog of models and choose the best among them (best fit plus some form of regularization). Or maybe we could use Markov models for all types of fields, not just for text. Dependencies between data could also be extracted automatically. This would require calculating the [relative entropy](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence) (the relative amount of information) between columns. A simpler alternative is to calculate relative cardinalities for each pair of columns (something like "how many different values of A are there on average for a fixed value B"). For instance, this will make it clear that `URLDomain` fully depends on the `URL`, and not vice versa. + +But I also rejected this idea because there are too many factors to consider, and it would take too long to write. + +### Neural networks + +As I've already mentioned, this task wasn't high on the priority list — no one was even thinking about trying to solve it. But as luck would have it, our colleague Ivan Puzirevsky was teaching at the Higher School of Economics. He asked me if I had any interesting problems that would work as suitable thesis topics for his students. When I offered him this one, he assured me it had potential. So I handed this challenge off to a nice guy "off the street" Sharif (he did have to sign an NDA to access the data, though). + +I shared all my ideas with him but emphasized that there were no restrictions on how the problem could be solved, and a good option would be to try approaches that I know nothing about, like using LSTM to generate a text dump of data. This seemed promising after coming across the article [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). + +The first challenge is that we need to generate structured data, not just text. But it wasn't clear whether a recurrent neural network could generate data with the desired structure. There are two ways to solve this. The first solution is to use separate models for generating the structure and the "filler", and only use the neural network for generating values. But this approach was postponed and then never completed. The second solution is to simply generate a TSV dump as text. Experience has shown that some of the rows in the text won't match the structure, but these rows can be thrown out when loading the data. + +The second challenge is that the recurrent neural network generates a sequence of data, and thus dependencies in data must follow in the order of the sequence. But in our data, the order of columns can potentially be in reverse to dependencies between them. We didn't do anything to resolve this problem. + +As summer approached, we had the first working Python script that generated data. The data quality seemed decent at first glance: + +![python_script.jpg](https://clickhouse.com/uploads/python_script_810d491dfb.jpg) + +However, we did run into some difficulties: + +1. The size of the model was about a gigabyte. We tried to create a model for data that was several gigabytes in size (for a start). The fact that the resulting model is so large raised concerns. Would it be possible to extract the real data that it was trained on? Unlikely. But I don't know much about machine learning and neural networks, and I haven't read this developer's Python code, so how can I be sure? There were several articles published at the time about how to compress neural networks without loss of quality, but it wasn't implemented. On the one hand, this doesn't seem to be a serious problem since we can opt out of publishing the model and just publish the generated data. On the other hand, if overfitting occurs, the generated data may contain some part of the source data. + +2. On a machine with a single CPU, the data generation speed is approximately 100 rows per second. Our goal was to generate at least a billion rows. Calculations showed that this wouldn't be completed before the date of the thesis defense. It didn't make sense to use additional hardware because the goal was to make a data generation tool that anyone could use. + +Sharif tried to analyze the quality of data by comparing statistics. Among other things, he calculated the frequency of different characters occurring in the source data and in the generated data. The result was stunning: the most frequent characters were Ð and Ñ. + +Don't worry about Sharif, though. He successfully defended his thesis, and we happily forgot about the whole thing. + +### Mutation of compressed data + +Let's assume that the problem statement has been reduced to a single point: we need to generate data that has the same compression ratio as the source data, and the data must decompress at the same speed. How can we achieve this? We need to edit compressed data bytes directly! This allows us to change the data without changing the size of the compressed data, plus everything will work fast. I wanted to try out this idea right away, despite the fact that the problem it solves is different from what we started with. But that's how it always is. + +So how do we edit a compressed file? Let's say we are only interested in LZ4. LZ4 compressed data is composed of sequences, which in turn are strings of not-compressed bytes (literals), followed by a match copy: + +1. Literals (copy the following N bytes as is). +2. Matches with a minimum repeat length of 4 (repeat N bytes in the file at a distance of M). + +Source data: + +`Hello world Hello.` + +Compressed data (arbitrary example): + +`literals 12 "Hello world " match 5 12.` + +In the compressed file, we leave "match" as-is and change the byte values in "literals". As a result, after decompressing, we get a file in which all repeating sequences at least 4 bytes long are also repeated at the same distance, but they consist of a different set of bytes (basically, the modified file doesn't contain a single byte that was taken from the source file). + +But how do we change the bytes? The answer isn't obvious because, in addition to the column types, the data also has its own internal, implicit structure that we would like to preserve. For example, text data is often stored in UTF-8 encoding, and we want the generated data also to be valid UTF-8. I developed a simple heuristic that involves meeting several criteria: + +- Null bytes and ASCII control characters are kept as-is. +- Some punctuation characters remain as-is. +- ASCII is converted to ASCII, and for everything else, the most significant bit is preserved (or an explicit set of "if" statements is written for different UTF-8 lengths). In one byte class, a new value is picked uniformly at random. +- Fragments like `https://` are preserved; otherwise, it looks a bit silly. + +The only caveat to this approach is that the data model is the source data itself, which means it cannot be published. The model is only fit for generating amounts of data no larger than the source. On the contrary, the previous approaches provide models allowing the generation of data of arbitrary size. + +``` +http://ljc.she/kdoqdqwpgafe/klwlpm&qw=962788775I0E7bs7OXeAyAx +http://ljc.she/kdoqdqwdffhant.am/wcpoyodjit/cbytjgeoocvdtclac +http://ljc.she/kdoqdqwpgafe/klwlpm&qw=962788775I0E7bs7OXe +http://ljc.she/kdoqdqwdffhant.am/wcpoyodjit/cbytjgeoocvdtclac +http://ljc.she/kdoqdqwdbknvj.s/hmqhpsavon.yf#aortxqdvjja +http://ljc.she/kdoqdqw-bknvj.s/hmqhpsavon.yf#aortxqdvjja +http://ljc.she/kdoqdqwpdtu-Unu-Rjanjna-bbcohu_qxht +http://ljc.she/kdoqdqw-bknvj.s/hmqhpsavon.yf#aortxqdvjja +http://ljc.she/kdoqdqwpdtu-Unu-Rjanjna-bbcohu_qxht +http://ljc.she/kdoqdqw-bknvj.s/hmqhpsavon.yf#aortxqdvjja +http://ljc.she/kdoqdqwpdtu-Unu-Rjanjna-bbcohu-702130 +``` + +The results were positive, and the data was interesting, but something wasn't quite right. The URLs kept the same structure, but in some of them, it was too easy to recognize the original terms, such as "avito" (a popular marketplace in Russia), so I created a heuristic that swapped some of the bytes around. + +There were other concerns as well. For example, sensitive information could possibly reside in a FixedString column in binary representation and potentially consist of ASCII control characters and punctuation, which I decided to preserve. However, I didn't take data types into consideration. + +Another problem is that if a column stores data in the "length, value" format (this is how String columns are stored), how do I ensure that the length remains correct after the mutation? When I tried to fix this, I immediately lost interest. + +### Random permutations + +Unfortunately, the problem wasn't solved. We performed a few experiments, and it just got worse. The only thing left was to sit around doing nothing and surf the web randomly since the magic was gone. Luckily, I came across a page that [explained the algorithm](http://fabiensanglard.net/fizzlefade/index.php) for rendering the death of the main character in the game Wolfenstein 3D. + +wolfenstein.gif + +
      + +The animation is really well done — the screen fills up with blood. The article explains that this is actually a pseudorandom permutation. A random permutation of a set of elements is a randomly picked bijective (one-to-one) transformation of the set. In other words, a mapping where each and every derived element corresponds to exactly one original element (and vice versa). In other words, it is a way to randomly iterate through all the elements of a data set. And that is exactly the process shown in the picture: each pixel is filled in random order, without any repetition. If we were to just choose a random pixel at each step, it would take a long time to get to the last one. + +The game uses a very simple algorithm for pseudorandom permutation called linear feedback shift register ([LFSR](https://en.wikipedia.org/wiki/Linear-feedback_shift_register)). Similar to pseudorandom number generators, random permutations, or rather their families, can be cryptographically strong when parametrized by a key. This is exactly what we needed for our data transformation. However, the details were trickier. For example, cryptographically strong encryption of N bytes to N bytes with a pre-determined key and initialization vector seems like it would work for a pseudorandom permutation of a set of N-byte strings. Indeed, this is a one-to-one transformation, and it appears to be random. But if we use the same transformation for all of our data, the result may be susceptible to cryptoanalysis because the same initialization vector and key value are used multiple times. This is similar to the [Electronic Codebook](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#ECB) mode of operation for a block cipher. + +For example, three multiplications and two xorshift operations are used for the [murmurhash](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/src/Common/HashTable/Hash.h#L18) finalizer. This operation is a pseudorandom permutation. However, I should point out that hash functions don't have to be one-to-one (even hashes of N bits to N bits). + +Or here's another interesting [example from elementary number theory](https://preshing.com/20121224/how-to-generate-a-sequence-of-unique-random-integers/) from Jeff Preshing's website. + +How can we use pseudorandom permutations to solve our problem? We can use them to transform all numeric fields so we can preserve the cardinalities and mutual cardinalities of all combinations of fields. In other words, COUNT(DISTINCT) will return the same value as before the transformation and, furthermore, with any GROUP BY. + +It is worth noting that preserving all cardinalities somewhat contradicts our goal of data anonymization. Let's say someone knows that the source data for site sessions contains a user who visited sites from 10 different countries, and they want to find that user in the transformed data. The transformed data also shows that the user visited sites from 10 different countries, which makes it easy to narrow down the search. However, even if they find out what the user was transformed into, it won't be very useful; all of the other data has also been transformed, so they won't be able to figure out what sites the user visited or anything else. But these rules can be applied in a chain. For example, suppose someone knows that the most frequently occurring website in our data is Google, with Yahoo in second place. In that case, they can use the ranking to determine which transformed site identifiers actually mean Yahoo and Google. There's nothing surprising about this since we are working with an informal problem statement, and we are trying to find a balance between the anonymization of data (hiding information) and preserving data properties (disclosure of information). For information about how to approach the data anonymization issue more reliably, read this [article](https://medium.com/georgian-impact-blog/a-brief-introduction-to-differential-privacy-eacf8722283b). + +In addition to keeping the original cardinality of values, I also wanted to keep the order of magnitude of the values. What I mean is that if the source data contained numbers under 10, then I want the transformed numbers to also be small. How can we achieve this? + +For example, we can divide a set of possible values into size classes and perform permutations within each class separately (maintaining the size classes). The easiest way to do this is to take the nearest power of two or the position of the most significant bit in the number as the size class (these are the same thing). The numbers 0 and 1 will always remain as is. The numbers 2 and 3 will sometimes remain as is (with a probability of 1/2) and will sometimes be swapped (with a probability of 1/2). The set of numbers 1024..2047 will be mapped to one of 1024! (factorial) variants, and so on. For signed numbers, we will keep the sign. + +It's also doubtful whether we need a one-to-one function. We can probably just use a cryptographically strong hash function. The transformation won't be one-to-one, but the cardinality will be close to the same. + +However, we need a cryptographically strong random permutation so that when we define a key and derive a permutation with that key, restoring the original data from the rearranged data without knowing the key would be difficult. + +There is one problem: in addition to knowing nothing about neural networks and machine learning, I am also quite ignorant when it comes to cryptography. That leaves just my courage. I was still reading random web pages and found a link on [Hackers News](https://news.ycombinator.com/item?id=15122540) to a discussion on Fabien Sanglard's page. It had a link to a [blog post](http://antirez.com/news/113) by Redis developer Salvatore Sanfilippo that talked about using a wonderful generic way of getting random permutations, known as a [Feistel network](https://en.wikipedia.org/wiki/Feistel_cipher). + +The Feistel network is iterative, consisting of rounds. Each round is a remarkable transformation that allows you to get a one-to-one function from any function. Let's look at how it works. + +1. The argument's bits are divided into two halves: +``` + arg: xxxxyyyy + arg_l: xxxx + arg_r: yyyy +``` +2. The right half replaces the left. In its place, we put the result of XOR on the initial value of the left half and the result of the function applied to the initial value of the right half, like this: + + ``` + res: yyyyzzzz + res_l = yyyy = arg_r + res_r = zzzz = arg_l ^ F(arg_r) +``` + +There is also a claim that if we use a cryptographically strong pseudorandom function for F and apply a Feistel round at least four times, we'll get a cryptographically strong pseudorandom permutation. + +This is like a miracle: we take a function that produces random garbage based on data, insert it into the Feistel network, and we now have a function that produces random garbage based on data, but yet is invertible! + +The Feistel network is at the heart of several data encryption algorithms. What we're going to do is something like encryption, only it's really bad. There are two reasons for this: + +1. We are encrypting individual values independently and in the same way, similar to the Electronic Codebook mode of operation. +2. We are storing information about the order of magnitude (the nearest power of two) and the sign of the value, which means that some values do not change at all. + +This way, we can obfuscate numeric fields while preserving the properties we need. For example, after using LZ4, the compression ratio should remain approximately the same because the duplicate values in the source data will be repeated in the converted data and at the same distances from each other. + +### Markov models + +Text models are used for data compression, predictive input, speech recognition, and random string generation. A text model is a probability distribution of all possible strings. Let's say we have an imaginary probability distribution of the texts of all the books that humanity could ever write. To generate a string, we just take a random value with this distribution and return the resulting string (a random book that humanity could write). But how do we find out the probability distribution of all possible strings? + +First, this would require too much information. There are 256^10 possible strings that are 10 bytes in length, and it would take quite a lot of memory to explicitly write a table with the probability of each string. Second, we don't have enough statistics to accurately assess the distribution. + +This is why we use a probability distribution obtained from rough statistics as the text model. For example, we could calculate the probability of each letter occurring in the text and then generate strings by selecting each next letter with the same probability. This primitive model works, but the strings are still very unnatural. + +To improve the model slightly, we could also make use of the conditional probability of the letter's occurrence if it is preceded by N-specific letters. N is a pre-set constant. Let's say N = 5, and we are calculating the probability of the letter "e" occurring after the letters "compr". This text model is called an Order-N Markov model. + +``` +P(cata | cat) = 0.8 +P(catb | cat) = 0.05 +P(catc | cat) = 0.1 +... +``` + +Let's look at how Markov models work on the website [of Hay Kranen](https://projects.haykranen.nl/markov/demo/). Unlike LSTM neural networks, the models only have enough memory for a small context of fixed-length N, so they generate funny nonsensical texts. Markov models are also used in primitive methods for generating spam, and the generated texts can be easily distinguished from real ones by counting statistics that don't fit the model. There is one advantage: Markov models work much faster than neural networks, which is exactly what we need. + +Example for Title (our examples are in Turkish because of the data used): + +
      +

      Hyunday Butter'dan anket shluha — Politika head manşetleri | STALKER BOXER Çiftede book — Yanudistkarışmanlı Mı Kanal | League el Digitalika Haberler Haberleri — Haberlerisi — Hotels with Centry'ler Neden babah.com

      +
      + +We can calculate statistics from the source data, create a Markov model, and generate new data. Note that the model needs smoothing to avoid disclosing information about rare combinations in the source data, but this is not a problem. We use a combination of models from 0 to N. If statistics are insufficient for order N, the N−1 model is used instead. + +But we still want to preserve the cardinality of data. In other words, if the source data had 123456 unique URL values, the result should have approximately the same number of unique values. We can use a deterministically initialized random number generator to achieve this. The easiest way is to use a hash function and apply it to the original value. In other words, we get a pseudorandom result that is explicitly determined by the original value. + +Another requirement is that the source data may have many different URLs that start with the same prefix but aren't identical. For example: `https://www.clickhouse.com/images/cats/?id=xxxxxx`. We want the result to also have URLs that all start with the same prefix, but a different one. For example: http://ftp.google.kz/cgi-bin/index.phtml?item=xxxxxx. As a random number generator for generating the next character using a Markov model, we'll take a hash function from a moving window of 8 bytes at the specified position (instead of taking it from the entire string). + +
      +
      +https://www.clickhouse.com/images/cats/?id=12345 + ^^^^^^^^ + +distribution: [aaaa][b][cc][dddd][e][ff][ggggg][h]... +hash("images/c") % total_count: ^ +
      +
      + + It turns out to be exactly what we need. Here's the example of page titles: + +
      +
      +PhotoFunia - Haber7 - Have mükemment.net Oynamak içinde şaşıracak haber, Oyunu Oynanılmaz • apród.hu kínálatában - RT Arabic +PhotoFunia - Kinobar.Net - apród: Ingyenes | Posti +PhotoFunia - Peg Perfeo - Castika, Sıradışı Deniz Lokoning Your Code, sire Eminema.tv/ +PhotoFunia - TUT.BY - Your Ayakkanın ve Son Dakika Spor, +PhotoFunia - big film izle, Del Meireles offilim, Samsung DealeXtreme Değerler NEWSru.com.tv, Smotri.com Mobile yapmak Okey +PhotoFunia 5 | Galaxy, gt, după ce anal bilgi yarak Ceza RE050A V-Stranç +PhotoFunia :: Miami olacaksını yerel Haberler Oyun Young video +PhotoFunia Monstelli'nin En İyi kisa.com.tr –Star Thunder Ekranı +PhotoFunia Seks - Politika,Ekonomi,Spor GTA SANAYİ VE +PhotoFunia Taker-Rating Star TV Resmi Söylenen Yatağa każdy dzież wierzchnie +PhotoFunia TourIndex.Marketime oyunu Oyna Geldolları Mynet Spor,Magazin,Haberler yerel Haberleri ve Solvia, korkusuz Ev SahneTv +PhotoFunia todo in the Gratis Perky Parti'nin yapıyı by fotogram +PhotoFunian Dünyasın takımız halles en kulları - TEZ +
      +
      + +## Results + +After trying four methods, I got so tired of this problem that it was time just to choose something, make it into a usable tool, and announce the solution. I chose the solution that uses random permutations and Markov models parametrized by a key. It is implemented as the clickhouse-obfuscator program, which is very easy to use. The input is a table dump in any supported format (such as CSV or JSONEachRow), and the command line parameters specify the table structure (column names and types) and the secret key (any string, which you can forget immediately after use). The output is the same number of rows of obfuscated data. + +The program is installed with `clickhouse-client`, has no dependencies, and works on almost any flavor of Linux. You can apply it to any database dump, not just ClickHouse. For instance, you can generate test data from MySQL or PostgreSQL databases or create development databases that are similar to your production databases. + +```bash +clickhouse-obfuscator \ + --seed "$(head -c16 /dev/urandom | base64)" \ + --input-format TSV --output-format TSV \ + --structure 'CounterID UInt32, URLDomain String, \ + URL String, SearchPhrase String, Title String' \ + < table.tsv > result.tsv +``` + +```bash + clickhouse-obfuscator --help +``` + +Of course, everything isn't so cut and dry because data transformed by this program is almost completely reversible. The question is whether it is possible to perform the reverse transformation without knowing the key. If the transformation used a cryptographic algorithm, this operation would be as difficult as a brute-force search. Although the transformation uses some cryptographic primitives, they are not used in the correct way, and the data is susceptible to certain methods of analysis. To avoid problems, these issues are covered in the documentation for the program (access it using --help). + +In the end, we transformed the data set we needed [for functional and performance testing](https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/), and received approval from our data security team to publish. + +Our developers and members of our community use this data for real performance testing when optimizing algorithms inside ClickHouse. Third-party users can provide us with their obfuscated data so that we can make ClickHouse even faster for them. We also released an independent open benchmark for hardware and cloud providers on top of this data: [https://benchmark.clickhouse.com/](https://benchmark.clickhouse.com/) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index f649e81c50a..118610e4dcd 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -35,10 +35,6 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro) -if (USE_GDB_ADD_INDEX) - add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) -endif() - if (SPLIT_DEBUG_SYMBOLS) clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge) else() diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 6e93246e59a..147ba43a51d 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -30,7 +30,7 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int UNKNOWN_TABLE; extern const int BAD_ARGUMENTS; } @@ -180,8 +180,19 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ columns.emplace_back(column_name, std::move(column_type)); } + /// Usually this should not happen, since in case of table does not + /// exists, the call should be succeeded. + /// However it is possible sometimes because internally there are two + /// queries in ClickHouse ODBC bridge: + /// - system.tables + /// - system.columns + /// And if between this two queries the table will be removed, them + /// there will be no columns + /// + /// Also sometimes system.columns can return empty result because of + /// the cached value of total tables to scan. if (columns.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns definition was not returned"); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned"); WriteBufferFromHTTPServerResponse out( response, diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 2cc26926b38..f3ff0bbcd78 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -4,10 +4,10 @@ if (NOT( AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR ) ) - set (COMPRESSOR "${CMAKE_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor") - set (DECOMPRESSOR "--decompressor=${CMAKE_BINARY_DIR}/utils/self-extracting-executable/decompressor") + set (COMPRESSOR "${PROJECT_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor") + set (DECOMPRESSOR "--decompressor=${PROJECT_BINARY_DIR}/utils/self-extracting-executable/decompressor") else () - set (COMPRESSOR "${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor") + set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") endif () add_custom_target (self-extracting ALL diff --git a/programs/server/MetricsTransmitter.cpp b/programs/server/MetricsTransmitter.cpp index 2f28f0a1d16..ae9fa5ecc2c 100644 --- a/programs/server/MetricsTransmitter.cpp +++ b/programs/server/MetricsTransmitter.cpp @@ -87,7 +87,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count if (send_events) { - for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); const auto counter_increment = counter - prev_counters[i]; @@ -100,7 +100,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count if (send_events_cumulative) { - for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); std::string key{ProfileEvents::getName(static_cast(i))}; @@ -110,7 +110,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count if (send_metrics) { - for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) + for (CurrentMetrics::Metric i = CurrentMetrics::Metric(0), end = CurrentMetrics::end(); i < end; ++i) { const auto value = CurrentMetrics::values[i].load(std::memory_order_relaxed); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 1486a51c710..03ada89e86f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -40,10 +39,13 @@ #include #include #include +#include +#include +#include #include #include #include -#include +#include #include #include #include @@ -66,7 +68,6 @@ #include #include #include -#include #include #include #include @@ -99,9 +100,7 @@ #include "config_version.h" #if defined(OS_LINUX) -# include # include -# include # include # include # include @@ -109,7 +108,6 @@ #endif #if USE_SSL -# include # include #endif @@ -135,6 +133,7 @@ namespace CurrentMetrics extern const Metric Revision; extern const Metric VersionInteger; extern const Metric MemoryTracking; + extern const Metric MergesMutationsMemoryTracking; extern const Metric MaxDDLEntryID; extern const Metric MaxPushedDDLEntryID; } @@ -204,40 +203,6 @@ int mainEntryClickHouseServer(int argc, char ** argv) } } - -namespace -{ - -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) -{ - const size_t sleep_max_ms = 1000 * seconds_to_wait; - const size_t sleep_one_ms = 100; - size_t sleep_current_ms = 0; - size_t current_connections = 0; - for (;;) - { - current_connections = 0; - - for (auto & server : servers) - { - server.stop(); - current_connections += server.currentConnections(); - } - - if (!current_connections) - break; - - sleep_current_ms += sleep_one_ms; - if (sleep_current_ms < sleep_max_ms) - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms)); - else - break; - } - return current_connections; -} - -} - namespace DB { @@ -248,8 +213,6 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int EXCESSIVE_ELEMENT_IN_CONFIG; extern const int INVALID_CONFIG_PARAMETER; - extern const int FAILED_TO_GETPWUID; - extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; extern const int CORRUPTED_DATA; } @@ -265,54 +228,6 @@ static std::string getCanonicalPath(std::string && path) return std::move(path); } -static std::string getUserName(uid_t user_id) -{ - /// Try to convert user id into user name. - auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); - if (buffer_size <= 0) - buffer_size = 1024; - std::string buffer; - buffer.reserve(buffer_size); - - struct passwd passwd_entry; - struct passwd * result = nullptr; - const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result); - - if (error) - throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error); - else if (result) - return result->pw_name; - return toString(user_id); -} - -Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log) -{ - Poco::Net::SocketAddress socket_address; - try - { - socket_address = Poco::Net::SocketAddress(host, port); - } - catch (const Poco::Net::DNSException & e) - { - const auto code = e.code(); - if (code == EAI_FAMILY -#if defined(EAI_ADDRFAMILY) - || code == EAI_ADDRFAMILY -#endif - ) - { - LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. " - "If it is an IPv6 address and your host has disabled IPv6, then consider to " - "specify IPv4 address to listen in element of configuration " - "file. Example: 0.0.0.0", - host, e.code(), e.message()); - } - - throw; - } - return socket_address; -} - Poco::Net::SocketAddress Server::socketBindListen( const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, @@ -773,6 +688,16 @@ try server_settings.max_io_thread_pool_free_size, server_settings.io_thread_pool_queue_size); + BackupsIOThreadPool::initialize( + server_settings.max_backups_io_thread_pool_size, + server_settings.max_backups_io_thread_pool_free_size, + server_settings.backups_io_thread_pool_queue_size); + + OutdatedPartsLoadingThreadPool::initialize( + server_settings.max_outdated_parts_loading_thread_pool_size, + 0, // We don't need any threads one all the parts will be loaded + server_settings.outdated_part_loading_thread_pool_queue_size); + /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) { @@ -810,9 +735,8 @@ try } ); - ConnectionCollector::init(global_context, server_settings.max_threads_for_connection_collector); - - bool has_zookeeper = config().has("zookeeper"); + zkutil::validateZooKeeperConfig(config()); + bool has_zookeeper = zkutil::hasZooKeeperConfig(config()); zkutil::ZooKeeperNodeCache main_config_zk_node_cache([&] { return global_context->getZooKeeper(); }); zkutil::EventPtr main_config_zk_changed_event = std::make_shared(); @@ -954,30 +878,13 @@ try std::string default_database = server_settings.default_database.toString(); /// Check that the process user id matches the owner of the data. - const auto effective_user_id = geteuid(); - struct stat statbuf; - if (stat(path_str.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) - { - const auto effective_user = getUserName(effective_user_id); - const auto data_owner = getUserName(statbuf.st_uid); - std::string message = "Effective user of the process (" + effective_user + - ") does not match the owner of the data (" + data_owner + ")."; - if (effective_user_id == 0) - { - message += " Run under 'sudo -u " + data_owner + "'."; - throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); - } - else - { - global_context->addWarningMessage(message); - } - } + assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); }); global_context->setPath(path_str); StatusFile status{path / "status", StatusFile::write_full_info}; - DB::ServerUUID::load(path / "uuid", log); + ServerUUID::load(path / "uuid", log); /// Try to increase limit on number of open files. { @@ -1188,12 +1095,12 @@ try { Settings::checkNoSettingNamesAtTopLevel(*config, config_path); - ServerSettings server_settings; - server_settings.loadSettingsFromConfig(*config); + ServerSettings server_settings_; + server_settings_.loadSettingsFromConfig(*config); - size_t max_server_memory_usage = server_settings.max_server_memory_usage; + size_t max_server_memory_usage = server_settings_.max_server_memory_usage; - double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio; + double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio; size_t default_max_server_memory_usage = static_cast(memory_amount * max_server_memory_usage_to_ram_ratio); if (max_server_memory_usage == 0) @@ -1221,7 +1128,26 @@ try total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); - total_memory_tracker.setAllowUseJemallocMemory(server_settings.allow_use_jemalloc_memory); + size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; + + size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); + if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) + { + merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; + LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" + " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), + formatReadableSizeWithBinarySuffix(memory_amount), + server_settings_.merges_mutations_memory_usage_to_ram_ratio); + } + + LOG_INFO(log, "Merges and mutations memory limit is set to {}", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit)); + background_memory_tracker.setSoftLimit(merges_mutations_memory_usage_soft_limit); + background_memory_tracker.setDescription("(background)"); + background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking); + + total_memory_tracker.setAllowUseJemallocMemory(server_settings_.allow_use_jemalloc_memory); auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); @@ -1234,28 +1160,33 @@ try global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); - global_context->loadOrReloadDictionaries(*config); - global_context->loadOrReloadUserDefinedExecutableFunctions(*config); + if (global_context->isServerCompletelyStarted()) + { + /// It does not make sense to reload anything before server has started. + /// Moreover, it may break initialization order. + global_context->loadOrReloadDictionaries(*config); + global_context->loadOrReloadUserDefinedExecutableFunctions(*config); + } global_context->setRemoteHostFilter(*config); - global_context->setMaxTableSizeToDrop(server_settings.max_table_size_to_drop); - global_context->setMaxPartitionSizeToDrop(server_settings.max_partition_size_to_drop); + global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); + global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited; - if (server_settings.concurrent_threads_soft_limit_num > 0 && server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) - concurrent_threads_soft_limit = server_settings.concurrent_threads_soft_limit_num; - if (server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0) + if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) + concurrent_threads_soft_limit = server_settings_.concurrent_threads_soft_limit_num; + if (server_settings_.concurrent_threads_soft_limit_ratio_to_cores > 0) { - auto value = server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency(); + auto value = server_settings_.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency(); if (value > 0 && value < concurrent_threads_soft_limit) concurrent_threads_soft_limit = value; } ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit); - global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); - global_context->getProcessList().setMaxInsertQueriesAmount(server_settings.max_concurrent_insert_queries); - global_context->getProcessList().setMaxSelectQueriesAmount(server_settings.max_concurrent_select_queries); + global_context->getProcessList().setMaxSize(server_settings_.max_concurrent_queries); + global_context->getProcessList().setMaxInsertQueriesAmount(server_settings_.max_concurrent_insert_queries); + global_context->getProcessList().setMaxSelectQueriesAmount(server_settings_.max_concurrent_select_queries); if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); @@ -1266,34 +1197,34 @@ try /// This is done for backward compatibility. if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_pool_size; - auto new_ratio = server_settings.background_merges_mutations_concurrency_ratio; - global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio); - global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings.background_merges_mutations_scheduling_policy.toString()); + auto new_pool_size = server_settings_.background_pool_size; + auto new_ratio = server_settings_.background_merges_mutations_concurrency_ratio; + global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, static_cast(new_pool_size * new_ratio)); + global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings_.background_merges_mutations_scheduling_policy.toString()); } if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_move_pool_size; + auto new_pool_size = server_settings_.background_move_pool_size; global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_fetches_pool_size; + auto new_pool_size = server_settings_.background_fetches_pool_size; global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_common_pool_size; + auto new_pool_size = server_settings_.background_common_pool_size; global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } - global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings.background_buffer_flush_schedule_pool_size); - global_context->getSchedulePool().increaseThreadsCount(server_settings.background_schedule_pool_size); - global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings.background_message_broker_schedule_pool_size); - global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings.background_distributed_schedule_pool_size); + global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings_.background_buffer_flush_schedule_pool_size); + global_context->getSchedulePool().increaseThreadsCount(server_settings_.background_schedule_pool_size); + global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size); + global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size); if (config->has("resources")) { @@ -1304,7 +1235,7 @@ try { /// We do not load ZooKeeper configuration on the first config loading /// because TestKeeper server is not started yet. - if (config->has("zookeeper")) + if (zkutil::hasZooKeeperConfig(*config)) global_context->reloadZooKeeperIfChanged(config); global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); @@ -1366,8 +1297,8 @@ try { Poco::Net::ServerSocket socket; auto address = socketBindListen(config(), socket, listen_host, port); - socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); - socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); return ProtocolServerAdapter( listen_host, port_name, @@ -1389,8 +1320,8 @@ try #if USE_SSL Poco::Net::SecureServerSocket socket; auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); - socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); + socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)); + socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)); return ProtocolServerAdapter( listen_host, secure_port_name, @@ -1453,7 +1384,7 @@ try LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); } - global_context->setUncompressedCache(uncompressed_cache_size, uncompressed_cache_policy); + global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); @@ -1478,7 +1409,7 @@ try LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(mark_cache_size)); } - global_context->setMarkCache(mark_cache_size, mark_cache_policy); + global_context->setMarkCache(mark_cache_policy, mark_cache_size); if (server_settings.index_uncompressed_cache_size) global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size); @@ -1843,19 +1774,19 @@ try } if (current_connections) - LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); else LOG_INFO(log, "Closed all listening sockets."); /// Killing remaining queries. - if (server_settings.shutdown_wait_unfinished_queries) + if (!server_settings.shutdown_wait_unfinished_queries) global_context->getProcessList().killAllQueries(); if (current_connections) current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) - LOG_INFO(log, "Closed connections. But {} remain." + LOG_WARNING(log, "Closed connections. But {} remain." " Tip: To increase wait time add to config: 60", current_connections); else LOG_INFO(log, "Closed connections."); @@ -1871,7 +1802,7 @@ try /// Dump coverage here, because std::atexit callback would not be called. dumpCoverageReportIfPossible(); - LOG_INFO(log, "Will shutdown forcefully."); + LOG_WARNING(log, "Will shutdown forcefully."); safeExit(0); } }); diff --git a/programs/server/config.xml b/programs/server/config.xml index 85cb299e188..5b69d9f6283 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -348,10 +348,6 @@ 16 --> - - - 0.9 @@ -480,6 +476,14 @@ 1 1 + + sha256_password + + + 12 + false + + + 600 default - - + + SQL_ @@ -1126,6 +1137,16 @@ event_date + INTERVAL 30 DAY DELETE --> + + @@ -1287,10 +1308,14 @@ - *_dictionary.xml + *_dictionary.*ml - *_function.xml + *_function.*ml + + + - + - - + + diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index fa940e01ad5..97b35ec97c4 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -12,7 +12,7 @@ --chart-background: white; --shadow-color: rgba(0, 0, 0, 0.25); --input-shadow-color: rgba(0, 255, 0, 1); - --error-color: red; + --error-color: white; --legend-background: rgba(255, 255, 255, 0.75); --title-color: #666; --text-color: black; @@ -76,7 +76,7 @@ #charts { height: 100%; - display: none; + display: flex; flex-flow: row wrap; gap: 1rem; } @@ -121,6 +121,19 @@ .unconnected #url { width: 100%; } + .unconnected #button-options { + display: grid; + grid-auto-flow: column; + grid-auto-columns: 1fr; + gap: 0.3rem; + } + .unconnected #user { + margin-right: 0; + width: auto; + } + .unconnected #password { + width: auto; + } #user { margin-right: 0.25rem; width: 50%; @@ -136,7 +149,15 @@ width: 100%; display: flex; - flex-flow: row nowrap; + flex-flow: row nowrap; + } + .unconnected #username-password { + width: 100%; + + gap: 0.3rem; + + display: grid; + grid-template-columns: 1fr 1fr; } .inputs #chart-params { @@ -177,7 +198,10 @@ .themes { float: right; font-size: 20pt; - margin-bottom: 1rem; + gap: 0.3rem; + + display: flex; + justify-content: center; } #toggle-dark, #toggle-light { @@ -206,6 +230,8 @@ } #add, #reload { + padding: .25rem 0.5rem; + text-align: center; font-weight: bold; user-select: none; cursor: pointer; @@ -214,16 +240,24 @@ background: var(--new-chart-background-color); color: var(--new-chart-text-color); float: right; - margin-right: 0 !important; - margin-left: 1rem; + margin-right: 1rem !important; + margin-left: 0rem; margin-bottom: 1rem; } + /* .unconnected #reload { + margin-left: 3px; + } */ + #add:hover, #reload:hover { background: var(--button-background-color); } #auth-error { + align-self: center; + width: 60%; + padding: .5rem; + color: var(--error-color); display: flex; @@ -352,15 +386,15 @@ -
      - - +
      🌚🌞 + +
      +
      -
      -1454460241 16 Bruce Willis bwillisf@bluehost.com Male 239.182.219.189 3573030625927601 Brazil 239100.65 -1454461065 17 Emily Andrews eandrewsg@cornell.edu Female 29.231.180.172 30271790537626 Russia 4/13/1990 116800.65 Food Chemist -1454517864 18 Stephen Wallace swallaceh@netvibes.com Male 152.49.213.62 5433943468526428 Ukraine 1/15/1978 248877.99 Account Representative I -1454499954 19 Clarence Lawson clawsoni@vkontakte.ru Male 107.175.15.152 3544052814080964 Russia 177122.99 -1454495436 20 Rebecca Bell rbellj@bandcamp.com Female 172.215.104.127 China 137251.19 -1454505444 21 Diane Stevens dstevensk@cnet.com Female 141.243.73.164 Russia 6/5/1985 87978.22 Food Chemist œ∑´®†¥¨ˆøπ“‘ -1454523505 22 Lawrence Ramos lramosl@sourceforge.net Male 46.72.4.6 3537473810855655 Tanzania 131283.64 -1454525455 23 Gregory Barnes gbarnesm@google.ru Male 220.22.114.145 3538432455620641 Tunisia 1/23/1971 182233.49 Senior Sales Associate 사회과학원 어학연구소 -1454472340 24 Michelle Ellis mellisn@timesonline.co.uk Female 239.81.215.135 3547383558025965 Tanzania 6/5/1964 278001.46 Tax Accountant -1454518347 25 Rachel Perkins rperkinso@lulu.com Female 90.173.28.95 633313663891003209 Russia 176178.75 -1454486554 26 Anthony Lawrence alawrencep@miitbeian.gov.cn Male 121.211.242.99 564182969714151470 Japan 12/10/1979 170085.81 Electrical Engineer -1454488886 27 Henry Henry hhenryq@godaddy.com Male 191.88.236.116 4905730021217853521 China 9/22/1995 284300.15 Nuclear Power Engineer -1454519352 28 Samuel Hunter shunterr@instagram.com Male 72.190.230.173 5002353797389897 Brazil 9/21/1968 108950.24 Environmental Tech -1454469374 29 Jacqueline Holmes jholmess@ustream.tv Female 47.141.224.95 3555934842115316 United States 247939.52 ̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟ -1454535469 30 Annie Torres atorrest@ning.com Female 202.94.67.27 3530389861801215 Nigeria 5/20/1958 118310.72 Electrical Engineer -1E+02 -1454526588 31 Antonio Berry aberryu@ow.ly Male 5.82.180.4 Thailand 135007.96 -1454533547 32 Nicole Martinez nmartinezv@oakley.com Female 46.32.149.87 United States 149720.75 Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮ -1454459459 33 Christina Mason cmasonw@nydailynews.com Female 74.214.22.120 Greece 7/21/1986 242593.85 Senior Sales Associate -1454541103 34 Margaret Barnes mbarnesx@angelfire.com Female 133.178.126.244 3582552005871223 South Africa 11/13/1969 109644.23 Human Resources Assistant II -1454487881 35 Melissa Kelly mkellyy@unblog.fr Female 179.132.207.169 6374648559206801 Indonesia 2/6/1968 45639.62 General Manager Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣ -1454484472 36 Betty Carr bcarrz@parallels.com Female 159.201.161.49 France 91370.3 -1E2 -1454532399 37 Dorothy Gomez dgomez10@jiathis.com Female 65.111.200.146 493684876859391834 China 57194.86 -1454538878 38 Kathryn Lane klane11@netlog.com Female 169.141.178.89 5308993357499254 Czech Republic 8/20/1964 67783.73 Paralegal -1454511326 39 Jose Murphy jmurphy12@paypal.com Male 118.85.253.180 4994715164232848 Chile 8/8/1991 134708.82 Nuclear Power Engineer -1454458506 40 Jack Flores jflores13@yolasite.com Male 162.215.65.11 3577342788590928 Argentina 1/28/1958 81685.1 Financial Advisor -1454529124 41 Walter Martinez wmartinez14@spotify.com Male 165.150.92.96 Somalia 3/8/1972 212105.33 Health Coach I -1454473984 42 Todd Alvarez talvarez15@csmonitor.com Male 59.123.34.76 3557102122317535 Japan 12/19/1999 284728.99 Marketing Assistant -1454488466 43 Amanda Gray agray16@cdbaby.com Female 252.20.193.145 3561501596653859 China 8/28/1967 213410.26 Senior Quality Engineer -1454494415 44 Sharon Simpson ssimpson17@weather.com Female 242.68.147.87 France 9/28/1963 133884.94 Analog Circuit Design manager -1454526201 45 Bonnie Collins bcollins18@list-manage.com Female 132.217.56.27 3540813015762450 Germany 7/21/1986 67661.42 Business Systems Development Analyst -1454474597 46 Deborah Armstrong darmstrong19@addthis.com Female 89.44.11.142 Canada 4/8/1969 111569.22 Quality Control Specialist ⁦test⁧ -1454486980 47 Daniel Mccoy dmccoy1a@skype.com Male 115.85.247.190 3554507990607374 Central African Republic 66260.14 ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 -1454505529 48 Jean Flores jflores1b@samsung.com Female 211.70.131.207 5392903051983005 Nepal 4/6/1990 199100.32 Financial Advisor -1454521849 49 Lisa Snyder lsnyder1c@woothemes.com Female 145.202.177.215 30475362189761 Germany 12/12/1974 210631.91 Safety Technician II   -1454469295 50 Sean Alexander salexander1d@dagondesign.com Male 89.83.147.177 Bosnia and Herzegovina 5/29/1978 256068.38 Senior Financial Analyst -1454481568 51 Ernest Carroll ecarroll1e@dailymail.co.uk Male 194.224.39.215 5100172156945078 Portugal 11/1/1992 100269.36 Dental Hygienist -1454492589 52 Louise Dean ldean1f@tamu.edu Female 109.43.178.48 201996646854139 Ethiopia 173300.37 +1454457660 721 Shirley Williams swilliamsk0@sciencedirect.com 132.137.10.218 5610801309305920 Indonesia 8/13/1978 \N Help Desk Technician +1454457663 785 Daniel Spencer dspencerls@cargocollective.com Male 241.143.186.140 China 12/3/1997 194214.08 Internal Auditor +1454457674 880 Lillian Murray lmurrayof@guardian.co.uk Female 222.252.22.1 201713786459078 Norway 4/16/1981 282503.77 Business Systems Development Analyst +1454457684 852 Carol Patterson cpattersonnn@ycombinator.com Female 244.190.113.241 0604512080706322395 Liberia 5/8/1984 263412.02 Assistant Professor +1454457705 244 Sarah Freeman sfreeman6r@wikimedia.org Female 219.8.22.27 30520943172503 United States 3/25/1958 25806.31 Budget/Accounting Analyst II ⁰⁴⁵ +1454457740 633 Maria Fowler mfowlerhk@chronoengine.com Female 246.85.249.122 3584144503415501 China 11/25/1998 276712.79 Staff Scientist ␣ +1454457782 925 Chris Murphy cmurphypo@nature.com 89.217.243.136 5602220700741429 Russia \N +1454457790 788 Nicholas Butler nbutlerlv@thetimes.co.uk Male 77.38.58.165 3575506969751259 Brazil 2/10/1981 192076.79 Data Coordiator +1454457853 301 Jerry Welch jwelch8c@paginegialle.it Male 141.166.33.218 5602252929753349 Latvia 3/14/1973 28731.89 Software Engineer I 1454457952 53 Ralph Price rprice1g@tmall.com Male 152.6.235.33 4844227560658222 China 8/26/1986 168208.4 Teacher -1454467269 54 George Ferguson gferguson1h@51.la Male 129.108.219.50 3539784298399554 Macedonia 6/26/1971 153238.6 Computer Systems Analyst IV パーティーへ行かないか -1454515393 55 Anna Montgomery amontgomery1i@google.cn Female 80.111.141.47 3586860392406446 China 9/6/1957 92837.5 Software Test Engineer IV 1E2 -1454514049 56 Cheryl Lawrence clawrence1j@ameblo.jp Female 171.155.78.116 Finland 5/7/1985 200827.88 Recruiting Manager -1454459605 57 Willie Palmer wpalmer1k@t-online.de Male 164.107.46.161 4026614769857244 China 8/23/1986 184978.64 Environmental Specialist -1454478957 58 Arthur Berry aberry1l@unc.edu Male 52.42.24.55 3542761473624274 China 144164.88 -1454519593 59 Patricia Marshall pmarshall1m@dell.com Female 47.108.196.175 China 7/21/1984 69236.54 Environmental Specialist -1454466852 60 Cynthia Richards crichards1n@dailymail.co.uk Female 178.236.66.213 3557986543874466 Brazil 179378 -1454496286 61 David Sanders dsanders1o@fda.gov Male 94.143.190.8 3585745042921822 Mexico 2/15/1963 197445.45 Data Coordiator 0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟 -1454534081 62 Julia Sullivan jsullivan1p@wisc.edu Female 32.183.154.67 6767624411254094 Bolivia 11/28/1963 118311.39 Electrical Engineer -1454530379 63 Kevin Butler kbutler1q@symantec.com Male 21.88.110.64 3551107057688681 Georgia 12/13/1962 129632.55 Database Administrator III -1454475593 64 Dennis Ross dross1r@parallels.com Male 78.25.77.223 Portugal 5/27/1959 280933.71 Biostatistician II -1454478626 65 Raymond Jacobs rjacobs1s@sohu.com Male 188.52.98.175 5048378563875353 Indonesia 13673.35 -1454532460 66 Steven Pierce spierce1t@usgs.gov Male 230.13.54.19 5100178880451481 Namibia 4/10/1965 152382.69 Analyst Programmer -1454480831 67 Jonathan Ellis jellis1u@g.co Male 125.115.227.203 China 4/5/1991 268468.96 Staff Scientist   -1454460516 68 Rachel Price rprice1v@census.gov Female 89.52.192.105 Indonesia 5/6/1982 234502.16 Payment Adjustment Coordinator -1454492257 69 Harold Olson holson1w@chronoengine.com Male 169.173.35.139 China 7/25/1994 146917.43 Occupational Therapist -1454524497 70 Pamela Wagner pwagner1x@gravatar.com Female 184.97.191.144 5593584893781844 Italy 5/3/1964 253108.75 Automation Specialist I 1;DROP TABLE users -1454537805 71 Stephanie Watkins swatkins1y@rakuten.co.jp 124.183.29.113 30552863095190 Burkina Faso 8/29/1971 \N Physical Therapy Assistant -1454530454 72 John Ortiz jortiz1z@mozilla.org Male 4.70.220.127 5194470971764378 Sweden 2/13/1978 91566.02 Analyst Programmer -1454523864 73 Kimberly Wheeler kwheeler20@imgur.com Female 26.46.50.55 China 11/6/1978 31026.94 Junior Executive -1454470404 74 Kathryn Henderson khenderson21@ask.com Female 218.212.63.68 4936394111685353310 Ukraine 4/11/1985 59413.85 Pharmacist -$1.00 -1454527390 75 Catherine Gibson cgibson22@ebay.com Female 204.84.35.26 5402007176101895 Indonesia 12/20/1984 92315.94 Desktop Support Technician -1454509078 76 Carolyn Nelson cnelson23@tiny.cc Female 64.13.61.211 4844223687165886 Estonia 3/9/1985 179193.6 Social Worker -1454479055 77 Denise Nguyen dnguyen24@ovh.net Female 18.208.48.116 201900233821394 China 121013.48 +1454458004 607 Johnny Owens jowensgu@blogspot.com Male 181.25.18.91 5602239825516409 Indonesia 2/14/1960 169429.76 Health Coach III +1454458010 375 Bruce Gonzales bgonzalesae@studiopress.com Male 19.195.169.187 Sweden 7/4/1993 118244.57 Human Resources Manager "<>?:""{}|_+" +1454458170 744 Heather Richardson hrichardsonkn@twitter.com Female 129.15.137.135 Ukraine 12/26/1980 164117.18 GIS Technical Architect +1454458178 635 Willie Dixon wdixonhm@diigo.com Male 27.245.227.220 Japan 8/29/1992 265321.18 Senior Cost Accountant +1454458242 11 Susan Perkins sperkinsa@patch.com Female 180.85.0.62 3573823609854134 Russia 210001.95 +1454458282 175 Samuel Edwards sedwards4u@businessweek.com Male 60.248.106.175 676249211413011686 Russia 10/15/1986 75886.69 Senior Sales Associate 1454458493 78 Mildred Torres mtorres25@alibaba.com Female 38.102.60.15 6399156779396437 Russia 9/24/1960 166987.55 Paralegal -1454507970 79 Linda Shaw lshaw26@psu.edu Female 188.221.197.229 3557917782902346 Russia 9/30/1987 67211.67 Structural Analysis Engineer -1454540546 80 Anna Hudson ahudson27@gmpg.org Female 153.84.219.15 Indonesia 9/12/1997 110408.87 VP Marketing -1454536800 81 Albert Pierce apierce28@phoca.cz Male 145.148.40.149 Palestinian Territory 11/4/1955 43019.01 Web Developer III 0/0 -1454542995 82 Carol Franklin cfranklin29@marketwatch.com Female 32.189.30.244 67097647572873744 China 6/5/1978 31572.53 Automation Specialist II -1454506472 83 Carlos Washington cwashington2a@phpbb.com Male 90.239.40.124 67063904960748578 United States 11/4/1970 28853.61 Developer I ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 -1454463081 84 Kathryn Austin kaustin2b@livejournal.com Female 152.193.181.90 Philippines 10/8/1990 131855.43 Nurse Practicioner -1454494358 85 Lillian Gardner lgardner2c@hao123.com Female 189.104.46.70 Russia 10/28/1961 145282.64 Occupational Therapist -1454530407 86 Peter Mendoza pmendoza2d@paypal.com Male 77.225.63.206 3562330687037049 Mexico 12/23/1988 40664.88 Staff Scientist -1454466533 87 Dennis Torres dtorres2e@ask.com Male 199.131.129.105 50188330277167912 Croatia 5/25/1986 265985 Account Representative II 社會科學院語學研究所 -1454463286 88 Timothy Watkins twatkins2f@toplist.cz Male 120.52.182.111 Tunisia 6/24/2000 242129.05 Operator -1454498394 89 Nicole Willis nwillis2g@cmu.edu Female 44.196.120.110 6394724888228638 Indonesia 2/1/1966 258772.36 Physical Therapy Assistant -1454525151 90 Jacqueline Carr jcarr2h@freewebs.com Female 197.40.38.49 201939989746686 China 5/31/1961 100733.44 Civil Engineer (。◕ ∀ ◕。) -1454510656 91 Theresa Gonzalez tgonzalez2i@nih.gov Female 237.106.229.219 Argentina 8/10/1970 47723.61 Product Engineer -1454479785 92 Donald Bradley dbradley2j@latimes.com Male 244.82.249.86 3534114122488321 Indonesia 7/8/2000 105051.77 Tax Accountant -1454512853 93 Katherine Little klittle2k@cyberchimps.com Female 61.43.154.182 30218284989094 Poland 1/20/1990 155597.16 Associate Professor -1454516486 94 Ruth Cooper rcooper2l@apache.org Female 114.82.62.61 Indonesia 7/20/1993 181481.5 Civil Engineer -1454498785 95 Stephen Gutierrez sgutierrez2m@walmart.com Male 134.231.189.30 3560204445825528 Guatemala 8/22/1995 83986.79 Structural Engineer -1454473160 96 Kevin Scott kscott2n@histats.com Male 226.59.43.229 3558997916332270 United States 6/5/1966 130054.63 Graphic Designer ÅÍÎÏ˝ÓÔÒÚÆ☃ -1454540928 97 Steven Williamson swilliamson2o@devhub.com Male 122.216.99.88 France 238119.62 -1454473451 98 Shawn Adams sadams2p@imdb.com Male 148.92.123.202 5893564746795315893 Indonesia 11/10/1959 67749.83 Senior Developer ‫test‫ -1454507278 99 Russell Fields rfields2q@google.ca Male 110.74.199.162 Tanzania 1/2/1994 13268.99 Mechanical Systems Engineer -1454514595 100 Willie Weaver wweaver2r@google.de Male 13.54.121.138 3534023246040472 Mexico 8/21/1970 175694.61 Dental Hygienist ̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰ +1454458506 40 Jack Flores jflores13@yolasite.com Male 162.215.65.11 3577342788590928 Argentina 1/28/1958 81685.1 Financial Advisor +1454458536 749 Larry Fields lfieldsks@theguardian.com Male 46.57.123.222 3531208154739438 Yemen 139177.38 Œ„´‰ˇÁ¨ˆØ∏”’ +1454458564 521 Roy Palmer rpalmereg@nsw.gov.au Male 255.242.77.68 3589146577885209 Nepal 8/28/1964 262816.87 Software Test Engineer IV +1454458607 314 James Harvey jharvey8p@npr.org Male 96.88.41.248 3589416270039051 China 211553.57 +1454458706 995 Jose Mccoy jmccoyrm@elpais.com Male 117.37.215.98 560222933605513180 Norway 7/30/1987 275898.37 Graphic Designer +1454458727 835 Sean Castillo scastillon6@altervista.org 211.77.61.195 Portugal 6/15/1979 \N Quality Control Specialist +1454458739 821 Juan Foster jfosterms@reference.com Male 219.231.170.245 5108759901583907 Portugal 2/16/1969 120076.81 Quality Engineer 1E02 +1454458751 670 Irene Hughes ihughesil@topsy.com Female 154.194.86.224 3536739760978536 Netherlands 6/17/1973 274295.42 Structural Analysis Engineer +1454458801 149 Gregory Edwards gedwards44@icq.com Male 5.204.156.34 3548268624172124 Portugal 2/5/1977 236421.33 Librarian +1454458805 683 Joshua Ramirez jramireziy@liveinternet.ru Male 164.224.133.177 3574998106893089 France 10/24/1987 17658.63 Senior Developer +1454458862 226 James Austin jaustin69@istockphoto.com Male 228.107.68.143 4913037818454290 Russia 25084.49 +1454458909 659 Doris Welch dwelchia@about.com Female 195.125.217.107 3537263234825586 Indonesia 3/31/1995 183928.71 Quality Engineer +1454458914 479 Joseph Gordon jgordonda@trellian.com Male 140.193.192.82 3533495991170988 Indonesia 6/30/1960 262448.45 Health Coach II +1454458932 615 Marie Matthews mmatthewsh2@smugmug.com 8.217.73.21 589312447234085155 Indonesia 8/10/1973 \N Chief Design Engineer +1454458946 379 Martha Simmons msimmonsai@tripadvisor.com Female 8.141.39.185 Russia 9/18/1978 92766.32 Staff Scientist +1454458967 730 Anne Perez aperezk9@freewebs.com Female 208.87.2.91 China 8/18/1966 47293.4 Nuclear Power Engineer ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 +1454458979 426 Lois Green lgreenbt@1688.com 39.174.95.97 5100146457712544 Bulgaria 2/22/1955 \N Health Coach III +1454459038 810 Mark Kelley mkelleymh@blog.com 210.153.220.197 3543227090716355 Poland 5/31/1969 \N Programmer Analyst I +1454459045 475 Richard Howell rhowelld6@springer.com Male 176.182.155.97 Central African Republic 138775.31 ‪‪test‪ +1454459058 523 Phillip Butler pbutlerei@storify.com Male 184.124.14.67 China 12/18/1957 106832.85 Paralegal +1454459092 437 Virginia Robinson vrobinsonc4@opensource.org Female 148.213.54.195 3567035727522042 China 6/27/1995 24623.44 Senior Sales Associate +1454459132 722 Robin Spencer rspencerk1@github.com Female 83.129.98.63 3580163142176138 Poland 1/18/1987 171963.73 Budget/Accounting Analyst I +1454459226 291 Julia Medina jmedina82@cbc.ca Female 43.27.110.171 30163835573619 Russia 8/12/1991 109927.88 Software Engineer II +1454459288 800 Sarah Andrews sandrewsm7@kickstarter.com Female 238.132.217.166 5018303367167648843 China 4/19/1970 42010.56 Computer Systems Analyst IV +1454459290 162 Steve Spencer sspencer4h@deliciousdays.com Male 109.138.4.34 China 6/2/1964 79184.71 Teacher () { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; } +1454459301 322 Frances Fisher ffisher8x@businessinsider.com Female 55.187.133.82 30168292124913 Poland 11/4/1997 140594.79 Geologist IV 社會科學院語學研究所 +1454459320 370 Roger Gilbert rgilberta9@businesswire.com Male 46.96.123.235 Finland 1/20/1999 16506.02 Analog Circuit Design manager +1454459328 929 Susan Jordan sjordanps@ucla.edu Female 108.42.4.149 589358467890938815 Philippines 5/31/1995 44739.92 Account Coordinator +1454459330 215 Philip Fox pfox5y@vimeo.com Male 65.223.141.140 Israel 9/5/1991 218538.31 Graphic Designer +1454459356 265 Judith Simpson jsimpson7c@taobao.com 105.52.110.107 6378542962124121 Indonesia 12/12/1983 \N Project Manager """\'""\'""\'\'\'""" +1454459359 708 Judy Young jyoungjn@dailymail.co.uk Female 21.109.231.236 3554148278137055 Tunisia 1/2/1958 212070.86 Chief Design Engineer 田中さんにあげて下さい +1454459394 795 Clarence Edwards cedwardsm2@ed.gov 111.156.147.232 3533231926493017 Poland 12/23/1981 \N General Manager +1454459439 589 Gerald Porter gportergc@pcworld.com Male 97.189.77.0 Philippines 7/2/1979 278447.61 Professor +1454459459 33 Christina Mason cmasonw@nydailynews.com Female 74.214.22.120 Greece 7/21/1986 242593.85 Senior Sales Associate +1454459497 524 Brenda Willis bwillisej@sun.com Female 45.122.116.217 6380803357074248 Poland 108844.98 +1454459499 591 Rose Garrett rgarrettge@mit.edu Female 116.228.6.108 30147178065069 Philippines 10/5/1988 244134.1 Accountant III +1454459516 653 Lane Male 192.59.226.245 3528384158258405 China 12/26/1997 127912.54 Geologist I +1454459556 779 Richard Hunt rhuntlm@ovh.net Male 162.73.16.141 5203349476569897 China 6/24/1969 13375.17 Environmental Tech +1454459562 681 Betty Hamilton bhamiltoniw@facebook.com Female 193.209.0.183 Morocco 5/5/1965 210804.85 Human Resources Assistant II +1454459577 173 Amy Garza agarza4s@woothemes.com Female 75.187.251.37 China 82283.83 +1454459605 57 Willie Palmer wpalmer1k@t-online.de Male 164.107.46.161 4026614769857244 China 8/23/1986 184978.64 Environmental Specialist +1454459605 888 Marie Torres mtorreson@tamu.edu Female 190.148.84.34 5610170119678060511 Bosnia and Herzegovina 261087.2 +1454459709 293 Amy Cook acook84@prlog.org 186.92.46.224 Ukraine 7/23/1976 \N Human Resources Assistant III +1454459719 920 Johnny Brown jbrownpj@constantcontact.com Male 25.161.139.20 Sweden 4/17/1998 149870.24 Speech Pathologist +1454459729 137 Phillip Vasquez pvasquez3s@canalblog.com Male 195.121.180.8 5602221706127365 Ethiopia 7/28/1992 274927.74 Internal Auditor +1454459747 876 Samuel Hughes shughesob@dion.ne.jp Male 29.127.239.106 3535476909940686 Indonesia 220585.61 Œ„´‰ˇÁ¨ˆØ∏”’ +1454459781 4 Denise Riley driley3@gmpg.org Female 140.35.109.83 3576031598965625 China 4/8/1997 90263.05 Senior Cost Accountant +1454459806 195 Joe Hayes jhayes5e@opensource.org Male 96.48.27.170 343842871636339 Indonesia 239690.34 +1454459806 525 Elizabeth Porter eporterek@china.com.cn Female 249.248.212.114 Indonesia 7/7/1993 33270.67 Recruiter +1454459905 958 Louis Griffin lgriffinql@umn.edu 184.242.195.194 3571277617780793 China 10/31/1988 \N Assistant Media Planner +1454459969 655 Johnny Reed jreedi6@chicagotribune.com Male 169.161.103.111 4844445630272291 Russia 5/23/1979 68913.72 Quality Engineer +1454459981 614 Marie Ramirez mramirezh1@wikia.com Female 143.213.146.199 633390820329851783 China 7/17/1988 131783.55 Dental Hygienist +1454460012 200 Russell Ward rward5j@surveymonkey.com Male 73.156.128.8 Sweden 173849.81 +1454460033 454 Ashley Crawford acrawfordcl@weather.com Female 61.81.102.117 3563365997409370 Vietnam 264109.73 +1454460230 685 Joan Jackson jjacksonj0@paypal.com Female 153.5.15.100 Yemen 8/16/1992 54385.21 Structural Analysis Engineer +1454460236 222 Sara Price sprice65@usatoday.com Female 46.58.242.198 Canada 2/11/1959 49611.44 Sales Representative +1454460241 16 Bruce Willis bwillisf@bluehost.com Male 239.182.219.189 3573030625927601 Brazil 239100.65 +1454460496 906 Amanda Clark aclarkp5@facebook.com Female 190.75.162.144 56022268731524616 Norway 7/19/1982 39551.7 General Manager +1454460516 68 Rachel Price rprice1v@census.gov Female 89.52.192.105 Indonesia 5/6/1982 234502.16 Payment Adjustment Coordinator +1454460605 879 Diane Flores dfloresoe@wiley.com Female 88.102.252.118 201739112087937 Philippines 12/2/1969 250449.32 Sales Associate +1454460715 676 Michael Jackson mjacksonir@scribd.com Male 130.159.201.48 201788384710734 China 7/8/1957 170234.61 Database Administrator III +1454460728 550 Cheryl Evans cevansf9@yolasite.com Female 244.155.129.93 Japan 7/24/1955 12380.49 Budget/Accounting Analyst II +1454460813 761 Kathleen Cook kcookl4@geocities.jp Female 154.7.81.231 Bulgaria 5/12/1996 107594.9 Analyst Programmer +1454460817 599 Sean Garcia sgarciagm@blogger.com Male 94.211.15.55 3557998741604165 Serbia 8/24/1963 131270.12 Structural Engineer 0/0 +1454460934 939 Keith Hernandez khernandezq2@amazon.com Male 153.51.249.140 3550284883492520 Belarus 10/12/1977 56167.67 Environmental Tech +1454460945 763 Amanda Miller amillerl6@dagondesign.com Female 15.140.92.92 Philippines 11/24/1979 118824.39 Structural Engineer +1454460961 664 Kathleen Torres ktorresif@vistaprint.com Female 11.165.183.246 Nicaragua 4/6/1960 257366 Environmental Specialist +1454461065 17 Emily Andrews eandrewsg@cornell.edu Female 29.231.180.172 30271790537626 Russia 4/13/1990 116800.65 Food Chemist +1454461083 569 Heather Johnson hjohnsonfs@skype.com Female 3.121.91.120 3552946432961233 Argentina 11/24/1966 197315 Cost Accountant +1454461104 768 Gregory James gjameslb@businessweek.com Male 80.18.249.93 30041579214659 Sweden 78310.93 +1454461128 584 Lois Ross lrossg7@irs.gov Female 176.213.236.60 Brazil 6/23/1989 95013.72 Database Administrator IV 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +1454461201 856 Mildred Harper mharpernr@samsung.com Female 153.214.193.120 6763961170182948344 Finland 37573.27 +1454461259 383 Beverly Carter bcarteram@wordpress.com Female 4.251.6.51 3535631087457545 Indonesia 11/15/1982 272520.3 Compensation Analyst +1454461332 909 Samuel Henry shenryp8@163.com Male 204.10.183.241 6771639706876926 Philippines 4/3/1998 164954.8 Compensation Analyst +1454461498 678 Wanda Ford wfordit@sitemeter.com Female 63.28.195.79 Poland 28276.84 +1454461562 801 Annie Bradley abradleym8@jimdo.com Female 166.216.149.179 Poland 2/17/1970 267475.37 Quality Control Specialist +1454461671 643 Thomas Hunter thunterhu@pinterest.com Male 91.145.126.98 3574840401671309 China 3/3/1962 201611.79 Programmer II +1454461690 428 Dennis Marshall dmarshallbv@bloglines.com 51.104.218.177 3544646067494556 Pakistan \N +1454461771 3 Evelyn Morgan emorgan2@altervista.org Female 7.161.136.94 6767119071901597 Russia 2/1/1960 144972.51 Structural Engineer +1454461843 832 Anthony Duncan aduncann3@merriam-webster.com Male 54.202.218.90 3561384853362062 China 10/5/1982 239812.39 Human Resources Manager +1454461880 648 Eric Bryant ebryanthz@tripod.com Male 11.228.180.159 Sweden 3/21/1981 46534.77 Budget/Accounting Analyst I ١٢٣ +1454462013 943 Arthur Nelson anelsonq6@sun.com Male 201.79.146.145 5602257963938888 Ukraine 185554.08 +1454462053 994 Carol Williams cwilliamsrl@army.mil Female 53.242.60.20 France 1/5/1988 120933.54 Recruiter === Try load data from userdata2.parquet -1454506599 1 Donald Lewis dlewis0@clickbank.net Male 102.22.124.20 Indonesia 7/9/1972 140249.37 Senior Financial Analyst -1454458948 2 Walter Collins wcollins1@bloglovin.com Male 247.28.26.93 3587726269478025 China \N -1454524144 3 Michelle Henderson mhenderson2@geocities.jp Female 193.68.146.150 France 1/15/1964 236219.26 Teacher -1454506939 4 Lori Hudson lhudson3@dion.ne.jp 34.252.168.48 3568840151595649 Russia 4/22/1988 \N Nuclear Power Engineer -1454458529 5 Howard Miller hmiller4@fema.gov Male 103.193.150.230 3583473261055014 France 11/26/1998 50210.02 Senior Editor -1454496547 6 Frances Adams fadams5@123-reg.co.uk Female 106.196.106.93 Russia 3/27/1997 82175.77 Account Coordinator -1454528652 \N Steven Hanson shanson6@cisco.com Male 234.130.172.185 3550842607768119 Indonesia 129582.61 -1454487094 8 Louis Simmons lsimmons7@icio.us Male 18.69.80.15 China 6/1/1992 90744.86 Product Engineer -1454543811 9 Keith Parker kparker8@amazonaws.com Male 108.205.40.64 Guadeloupe 12/30/1992 60618.9 Developer II -1454485649 10 Wanda Walker wwalker9@latimes.com Female 246.214.98.78 3539421569669478 Portugal 137664.53 -1454517563 11 Kathryn Weaver kweavera@bizjournals.com Female 157.237.161.75 201425019338900 Sweden 117572.65 -1454482256 12 Philip Ward pwardb@sakura.ne.jp Male 77.140.225.69 201508031789224 Greece 9/3/1984 238925.79 Human Resources Manager -1454542618 13 Evelyn Harvey eharveyc@time.com 254.174.154.7 3539535868968594 China 5/15/1979 \N Software Engineer III -1454484804 14 Andrea Lane alaned@gov.uk Female 192.253.116.192 5100174455306952 Indonesia 1/19/1989 166778.42 Operator -1454507104 15 Bobby Vasquez bvasqueze@furl.net Male 126.60.18.195 3581051861650673 Philippines 1/25/1975 138184.83 Senior Editor -1454536690 16 Kenneth Gibson kgibsonf@soundcloud.com Male 91.153.142.170 5389947292571488 Peru 11/3/1975 98614.53 Environmental Tech -1454516554 17 Emily Hill ehillg@house.gov Female 109.107.174.205 Palestinian Territory 5/18/1956 218781.48 Executive Secretary -1454541649 18 Kelly Fowler kfowlerh@dell.com Female 147.58.88.116 3551741291105936 Greece 6/11/1975 117249.56 Systems Administrator III -1454524126 19 Diana Howell dhowelli@sphinn.com Female 21.240.75.42 4026635872860296 Iran 7/7/1993 174844.52 Teacher -1454466206 20 Johnny Collins jcollinsj@google.ca Male 38.173.129.250 372301677387203 Afghanistan 7/28/1987 155908.69 Social Worker -1454493912 21 Frank Bradley fbradleyk@shinystat.com Male 186.9.38.46 4913033819988246 Czech Republic 211051.83 -1454509391 22 Billy Thomas bthomasl@moonfruit.com Male 143.89.197.162 4026052248187794 Czech Republic 10/7/1991 282061.72 Professor 👾 🙇 💁 🙅 🙆 🙋 🙎 🙍 -1454523133 23 Philip Moreno pmorenom@rambler.ru Male 9.39.210.239 4041597502244971 United States 122560.95 -1454536839 24 Billy Ray brayn@meetup.com Male 230.255.220.160 201925598515489 Kazakhstan 2/9/1966 130424.35 VP Accounting 사회과학원 어학연구소 -1454509252 25 Ryan Wilson rwilsono@forbes.com Male 197.77.142.137 Poland 7/4/1961 280703.91 Software Test Engineer III -1454458024 26 Sandra Coleman scolemanp@blogger.com Female 230.159.39.252 3555708337891155 China 8/7/1971 113688.11 VP Sales -1454513250 27 Evelyn Moreno emorenoq@chronoengine.com Female 126.96.111.52 3557508895347766 United States 8/17/1990 167131.57 Recruiting Manager -1454509036 28 Elizabeth Warren ewarrenr@flavors.me Female 213.8.204.211 67099385430526802 China 6/14/1996 119515.12 Media Manager II -1454541241 29 Linda Hawkins lhawkinss@fotki.com Female 206.6.3.196 4913079795915711 Philippines 2/14/1961 107779.93 Technical Writer -1454493935 30 Janice Day jdayt@devhub.com Female 243.24.120.209 Ukraine 6/9/1972 53906.4 Marketing Manager -1454483872 31 Diane Perez dperezu@ihg.com Female 182.136.218.77 Belarus 2/9/1957 170326.91 Chief Design Engineer -1454529216 32 Bruce Robinson brobinsonv@redcross.org Male 5.126.135.106 201769377515751 Philippines 169520.45 -1454470160 33 Daniel Lawrence dlawrencew@usgs.gov Male 200.168.191.214 4911581295367856744 United States 5/7/1967 199535.76 VP Sales -1454474809 34 Theresa James tjamesx@quantcast.com Female 83.122.166.224 3545570545148759 Russia 104683.19 -1454536922 35 Scott Russell srusselly@printfriendly.com Male 92.233.3.208 Bolivia 205730.41 -1454514354 36 Ruby Vasquez rvasquezz@toplist.cz Female 8.148.83.49 France 11/5/1999 95407.16 Financial Advisor -1454524074 37 Jeffrey Hall jhall10@pagesperso-orange.fr Male 91.103.226.35 3531476231658075 Indonesia 5/29/1987 247716.37 Business Systems Development Analyst -1454477697 38 Debra Kennedy dkennedy11@state.tx.us Female 116.247.236.130 676732277565853203 Mexico 5/22/1955 272563.67 Desktop Support Technician -1454464041 39 Cole Male 157.157.28.86 4911512925983388490 Panama 91174.63 -1454521471 40 Helen Sanchez hsanchez13@oakley.com Female 222.122.74.77 Venezuela 2/11/1969 189240.59 Food Chemist -1454527305 41 Jennifer Russell jrussell14@cpanel.net Female 42.82.215.191 Morocco 80644.64 1E02 -1454479360 42 Fred Marshall fmarshall15@ifeng.com 160.92.143.233 6374102245574313 China 12/18/1984 \N Structural Engineer -1454464402 43 Terry Ford tford16@shop-pro.jp Male 169.34.131.192 3588107849306045 Turkmenistan 286388.01 -1454468866 44 Maria Mason mmason17@miibeian.gov.cn Female 213.62.60.224 060438374765421941 Sweden 7/6/1973 34664.91 Social Worker -1454486568 45 Sharon Schmidt sschmidt18@istockphoto.com Female 111.247.11.124 5100179876769597 Argentina 10/4/1982 150142.49 Mechanical Systems Engineer -1454483332 46 Gregory Jones gjones19@jimdo.com Male 132.88.44.128 30372001476487 China 12/31/1972 240265.01 Design Engineer -1454520829 47 Raymond Moore rmoore1a@arizona.edu 89.39.221.170 5602248693774107 Japan 4/24/1956 \N VP Sales -1454531788 48 Tammy Scott tscott1b@mlb.com Female 236.12.148.59 3577211980737555 Peru 10/14/1959 132064.01 Software Consultant -1454480004 49 Willie Alexander walexander1c@home.pl Male 2.199.150.177 Brazil 10/14/1958 26424.57 Executive Secretary `ィ(´∀`∩ -1454473891 50 William Garrett wgarrett1d@java.com Male 20.24.142.67 Croatia 10/9/1963 181424.2 Database Administrator III -1454463118 51 Patricia Peterson ppeterson1e@cpanel.net Female 77.242.54.160 3585161324543005 Peru 3/5/1987 176561.19 Media Manager III -1454488118 52 Andrew Cook acook1f@ftc.gov Male 220.139.174.228 6333320102003586 Bolivia 3/8/1969 185775.61 Computer Systems Analyst III -1454536072 53 Carol Nichols cnichols1g@statcounter.com Female 233.176.31.182 3543580855019963 Nigeria 1/6/1960 105346.38 Compensation Analyst -1454489053 54 Jimmy Morales jmorales1h@archive.org Male 199.160.215.73 3587538933267985 Kiribati 8/25/1961 146625.62 Assistant Media Planner -1454538033 55 Nancy Montgomery nmontgomery1i@freewebs.com Female 11.235.20.56 3586137339728301 China 128631.29 $1.00 -1454461902 56 Thomas Freeman tfreeman1j@java.com Male 161.123.216.250 3536920916224146 Colombia 8/4/1973 239571.27 Senior Developer -1454488504 57 Virginia Bell vbell1k@aboutads.info Female 79.142.13.145 3585595583423005 Malaysia 4/2/1998 252007.47 Actuary -1454496671 58 Tammy Adams tadams1l@virginia.edu Female 106.207.61.165 3528072249217643 Canada 1/26/1973 98463.77 Business Systems Development Analyst -1454516066 59 Cynthia Robertson crobertson1m@alibaba.com Female 106.110.239.97 Belarus 12/20/1962 90950.39 Help Desk Technician -1454523801 60 Steven Romero sromero1n@usa.gov Male 65.249.97.254 5007669084530801 Argentina 9/27/1963 14358.32 Quality Control Specialist -1454458452 61 Sean Greene sgreene1o@goo.gl Male 71.195.178.59 5602246313163081 China 2/20/1991 70656.63 Sales Representative -1454537851 62 Jerry Turner jturner1p@scribd.com Male 69.148.19.138 3561778321182616 New Zealand 5/25/1991 89186 Information Systems Manager -1454523562 63 Jennifer Mendoza jmendoza1q@shutterfly.com Female 54.114.8.9 3544098267391200 Russia 7/8/1973 263720.16 General Manager -1454477002 64 Roy Hughes rhughes1r@stanford.edu Male 209.120.70.78 3552886646968253 Canada 10/30/1968 191750.33 Mechanical Systems Engineer -1454477109 65 Susan Jenkins sjenkins1s@princeton.edu Female 247.155.65.12 Philippines 3/1/1967 86339.04 VP Sales -1454527329 66 Norma Dunn ndunn1t@pen.io Female 250.241.78.109 China 7/20/1967 77739.6 Web Designer I -1454461701 67 Tina Reid treid1u@163.com Female 116.38.145.226 Germany 4/25/1967 228301.51 Financial Analyst -1454478121 68 Cynthia Daniels cdaniels1v@pinterest.com Female 17.140.57.238 3589952234971047 Burundi 1/9/1956 42221.96 Research Nurse -1454462100 69 Wells Male 92.13.7.20 Philippines 7/4/1969 78486.77 Tax Accountant -1454516337 70 Stephen Butler sbutler1x@moonfruit.com Male 230.147.124.190 Argentina 125060.01 -1454459366 71 Jacqueline Wallace jwallace1y@dagondesign.com Female 203.83.140.84 3578315582149538 Turkmenistan 4/15/1997 89436.49 Cost Accountant -1454479818 72 Carol Dunn cdunn1z@ocn.ne.jp Female 241.2.84.72 5602252003430282308 Bulgaria 2/1/1981 203473.36 Geological Engineer -1454505977 73 Russell Williams rwilliams20@imgur.com Male 21.217.68.126 3566925409646658 Slovenia 1/30/1977 252402.64 Librarian -1454476392 74 Kathryn Torres ktorres21@rakuten.co.jp Female 4.124.222.88 4026779356659103 Portugal 7/31/1956 121285.58 Project Manager -1454463675 75 Larry Mason lmason22@alibaba.com Male 172.104.78.232 3587717468815331 Sweden 4/20/1969 248583.77 Professor -1454517479 76 Rachel Dunn rdunn23@hugedomains.com Female 101.213.94.161 6374938227969686 Peru 6/18/1999 79245.45 Chief Design Engineer +1454457626 638 Richard Perkins rperkinshp@princeton.edu Male 206.117.180.117 China 4/11/2000 123221.64 Tax Accountant 1454457675 77 Doris Elliott delliott24@shinystat.com Female 36.27.140.126 Portugal 9/23/1987 98288.74 Design Engineer -1454483215 78 William Mendoza wmendoza25@prlog.org Male 71.28.136.31 3580069171786970 China 3/20/1967 81965.94 Media Manager II "ثم نفس سقطت وبالتحديد، -1454504790 79 Elizabeth Payne epayne26@about.me Female 40.237.87.45 337941052859146 Estonia 49661.99 -1454481311 80 Dennis Robertson drobertson27@w3.org Male 189.45.163.164 Italy 5/2/1972 19984.47 Web Developer III -1454514914 81 Edward Little elittle28@mozilla.org Male 114.189.184.212 South Korea 11/19/1984 141645.22 Senior Sales Associate ../../../../../../../../../../../etc/passwd%00 -1454530264 82 Roy Tucker rtucker29@vistaprint.com Male 254.148.189.172 Portugal 285617.13 -1454510066 83 Matthew Gardner mgardner2a@wix.com Male 91.23.27.42 5602247355547230028 Brazil 1/18/1977 267617.18 Actuary -1454535958 84 Anthony Palmer apalmer2b@uol.com.br 25.228.124.126 3561410660537354 China 7/4/1974 \N Human Resources Assistant III -1454460668 85 John Hudson jhudson2c@rediff.com Male 75.191.191.171 3538638405093479 Georgia 6/22/1994 82621.71 Tax Accountant -1454479399 86 Jonathan Mills jmills2d@mail.ru Male 224.145.163.163 36504499928546 Philippines 77260.7 00˙Ɩ$- -1454491670 87 Christine Jackson cjackson2e@feedburner.com Female 8.207.125.219 Philippines 6/12/1964 32832.61 Occupational Therapist -1454475253 88 Eric Fernandez efernandez2f@artisteer.com Male 246.217.21.160 France 124825.77 -1454483421 89 Heather Diaz hdiaz2g@tmall.com Female 220.248.165.145 502080553226612964 China 7/26/1966 280714.33 Food Chemist -1454515874 90 Nicole Reid nreid2h@cisco.com Female 10.75.131.59 5610704755842409780 Philippines 12/15/1985 24922.19 Marketing Assistant -1454542340 91 Donald Murphy dmurphy2i@fema.gov Male 127.141.234.199 China 4/10/1977 76449.81 Cost Accountant -1454531823 92 Steven Wagner swagner2j@go.com Male 211.154.182.230 United Kingdom 249411.22 -1454539859 93 Ruth Alvarez ralvarez2k@sciencedaily.com 240.195.230.204 South Korea 7/11/1964 \N Senior Developer -1454462055 94 Carl Oliver coliver2l@cafepress.com Male 199.184.71.24 China 6/26/1967 215279.38 Operator (╯°□°)╯︵ ┻━┻) +1454457741 472 Sara Collins scollinsd3@yellowbook.com Female 238.228.239.222 5002357683259593 Philippines 1/6/1966 220244.65 Internal Auditor -1E02 +1454457764 681 Samuel Foster sfosteriw@github.io Male 101.228.90.125 676725448783712104 Brazil 6/27/1982 275514.12 Office Assistant II +1454457800 216 Robin Reed rreed5z@guardian.co.uk 191.104.133.70 Portugal 3/15/1978 \N Desktop Support Technician test⁠test‫ +1454457912 321 Joe Collins jcollins8w@google.com.hk Male 135.236.105.189 3573647966682865 Dominican Republic 106582.46 +1454457928 837 Jonathan Romero jromeron8@hp.com Male 129.49.88.101 30180713638645 Brazil 2/27/1957 238966.77 Speech Pathologist 1454457982 95 Teresa Ruiz truiz2m@diigo.com Female 22.118.240.24 337941028849437 Brazil 7/25/1994 243603.67 Cost Accountant -1454465475 96 Kathryn Carter kcarter2n@fastcompany.com Female 203.255.226.40 Greece 1/23/1969 34951.57 Registered Nurse -1454542755 97 Fred Perry fperry2o@imgur.com 46.52.134.142 3544236333368634 Indonesia 2/6/1966 \N Programmer Analyst III -1454477885 98 Harry Perkins hperkins2p@domainmarket.com Male 235.202.132.85 374288817366643 Russia 1/9/1962 167340.53 Physical Therapy Assistant -1454509699 99 Bobby Hicks bhicks2q@wix.com Male 253.252.57.121 3555445397654443 United States 8/10/1964 238304.33 Quality Control Specialist Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮ -1454515572 100 Tammy Dunn tdunn2r@list-manage.com Female 162.156.75.67 Brazil 4/24/1980 163106.38 Sales Representative +1454458012 218 Samuel Reed sreed61@sohu.com Male 131.124.128.124 3540638382406385 Brazil 257041.54 +1454458014 128 Harold Jenkins hjenkins3j@hostgator.com 204.144.188.106 374283629923426 Dominican Republic \N +1454458024 26 Sandra Coleman scolemanp@blogger.com Female 230.159.39.252 3555708337891155 China 8/7/1971 113688.11 VP Sales +1454458038 609 Joyce Palmer jpalmergw@mashable.com Female 164.56.14.55 6371540406366768 China 201121.46 +1454458083 879 Kevin Meyer kmeyeroe@squarespace.com Male 233.187.65.16 France 98010.89 +1454458190 705 Beverly Gonzales bgonzalesjk@wufoo.com Female 38.31.68.95 4405331360959318 Philippines 9/21/1957 42738.65 Director of Sales +1454458307 237 Richard Grant rgrant6k@etsy.com Male 241.252.232.2 6304639002149768801 Poland 2/23/1991 71635.33 Paralegal +1454458377 986 Melissa George mgeorgerd@apple.com Female 143.50.124.180 5602226915795555 Czech Republic 12/6/1962 63403.41 Internal Auditor +1454458390 181 Scott Marshall smarshall50@geocities.jp Male 137.234.29.113 3571996025746621 Philippines 4/23/1978 206952.7 Staff Scientist ␣ +1454458452 61 Sean Greene sgreene1o@goo.gl Male 71.195.178.59 5602246313163081 China 2/20/1991 70656.63 Sales Representative +1454458464 327 Janice Matthews jmatthews92@guardian.co.uk Female 71.195.173.202 6304527633260205 Russia 7/29/2000 157292.61 Physical Therapy Assistant +1454458470 657 Kathy Boyd kboydi8@skyrock.com 36.183.199.94 6389206450992194 China 4/24/1982 \N General Manager 🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧 +1454458494 390 Chris Mason cmasonat@purevolume.com Male 21.36.118.254 China 4/28/1983 168120.17 Sales Representative +1454458497 365 Albert Mills amillsa4@t.co Male 181.108.162.242 China 8/25/1962 180913.71 Recruiter +1454458508 999 Marie Medina mmedinarq@thetimes.co.uk Female 223.83.175.211 Kazakhstan 3/25/1969 53564.76 Speech Pathologist +1454458512 185 Brandon Williamson bwilliamson54@vimeo.com Male 4.249.36.104 4913822210519505 Russia 277603.75 +1454458529 5 Howard Miller hmiller4@fema.gov Male 103.193.150.230 3583473261055014 France 11/26/1998 50210.02 Senior Editor +1454458591 978 Jean Jacobs jjacobsr5@springer.com Female 143.77.255.89 6377468383747335 Guatemala 11/13/1977 218108.02 Accounting Assistant III +1454458647 788 Dennis Price dpricelv@google.co.jp Male 50.213.201.120 3588056573581168 Albania 10/29/1962 218338.58 Environmental Specialist +1454458655 450 Rose Mccoy rmccoych@livejournal.com Female 91.93.75.71 Dominican Republic 1/2/1972 192818.85 Executive Secretary \N +1454458657 213 Norma Garrett ngarrett5w@technorati.com Female 65.49.237.93 Albania 80916.71 +1454458725 110 Theresa Gardner tgardner31@photobucket.com Female 232.118.202.192 Ukraine 1/6/1982 243844.4 Health Coach II +1454458764 346 Thomas Richards trichards9l@ifeng.com Male 0.111.159.70 5610777337517834253 Thailand 2/19/1981 221644.31 Analog Circuit Design manager +1454458768 430 Linda Harvey lharveybx@google.ca Female 138.19.27.11 Indonesia 8/19/1961 200606 Teacher -1/2 +1454458782 287 Martin Ferguson mferguson7y@eventbrite.com Male 67.188.95.86 Portugal 7/2/1981 262746.89 Cost Accountant +1454458853 926 Joan Graham jgrahampp@icio.us Female 209.238.1.225 3557860962551501 China 3/1/1972 197284.8 Chief Design Engineer ‪‪test‪ +1454458888 533 Sarah Jordan sjordanes@europa.eu Female 120.197.115.153 5002357582121340 Indonesia 9/10/1963 146649.24 Programmer Analyst IV +1454458948 2 Walter Collins wcollins1@bloglovin.com Male 247.28.26.93 3587726269478025 China \N +1454459077 720 Theresa Hayes thayesjz@dion.ne.jp Female 43.78.228.159 Russia 231701.16 +1454459120 214 Margaret Hughes mhughes5x@biglobe.ne.jp Female 36.234.5.134 3546342491809456 Azerbaijan 127862.72 ˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs \'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ \'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥ +1454459148 737 Joseph Gray jgraykg@bbb.org Male 60.23.118.26 3540391233313117 United States 159699.28 +1454459184 419 Larry Black lblackbm@github.com Male 61.181.102.70 5108758999951786 Canada 4/12/1997 263463.01 Staff Accountant I +1454459341 559 Raymond Gray rgrayfi@mapy.cz Male 104.112.4.152 201619406564124 Brazil 4/29/1955 132421.37 VP Quality Control 和製漢語 +1454459366 71 Jacqueline Wallace jwallace1y@dagondesign.com Female 203.83.140.84 3578315582149538 Turkmenistan 4/15/1997 89436.49 Cost Accountant +1454459447 315 Earl Rivera erivera8q@weebly.com Male 249.22.156.255 6333306262684398 Macedonia 33051.81 """" +1454459464 298 Johnny Kelly jkelly89@dailymail.co.uk Male 56.120.150.167 4614973744018 Malaysia 10/20/1965 254369.91 Automation Specialist III +1454459540 329 Mary Diaz mdiaz94@macromedia.com Female 60.49.220.52 5108751463671162 Mongolia 9/12/1997 112279.71 Project Manager +1454459624 842 Brenda Jones bjonesnd@mysql.com Female 200.142.153.124 Colombia 10/1/1963 250051.84 Safety Technician III +1454459634 775 Lillian Ryan lryanli@t-online.de Female 152.216.220.164 3541599165648107 Iran 8/19/1967 138178.35 VP Marketing +1454459634 998 Stephanie Sims ssimsrp@newyorker.com Female 135.66.68.181 3548125808139842 Poland 112275.78 +1454459658 659 Julie Anderson jandersonia@shareasale.com Female 21.61.224.82 343450744553044 Netherlands 12/27/1976 68225.51 Compensation Analyst +1454459679 634 Harry Olson holsonhl@skyrock.com Male 57.82.212.119 5002351465267817 Chile 4/3/1956 173608.69 Assistant Professor +1454459732 892 Thompson Female 9.228.212.189 Czech Republic 10/3/1964 184732.94 Budget/Accounting Analyst IV +1454459817 495 Steve Ramos sramosdq@go.com Male 209.215.139.231 5602239349519376 France 194636.12 +1454459838 271 Nicole Wright nwright7i@businessinsider.com Female 213.168.29.131 3551761943539373 Chile 2/22/1967 34243.03 Budget/Accounting Analyst III +1454459839 424 Kimberly Coleman kcolemanbr@bizjournals.com Female 83.237.12.153 5641829981259605 Iran 280387.11 +1454459870 701 Bobby Chavez bchavezjg@tinypic.com Male 71.18.120.35 3575292555485293 China 5/20/1965 13910.56 Product Engineer åß∂ƒ©˙∆˚¬…æ +1454459921 954 Willie Thomas wthomasqh@earthlink.net Male 173.219.113.26 3560763628353111 Mexico 5/31/1990 201325.44 Programmer Analyst I +1454459944 694 Theresa Graham tgrahamj9@amazon.com Female 176.19.106.64 3539554098566813 China 4/8/1983 155735.87 Administrative Assistant III +1454459984 693 Jonathan Graham jgrahamj8@berkeley.edu Male 239.139.123.46 3581752291204508 Sweden 9/12/1961 16159.02 Statistician III +1454460022 127 Anna Moreno amoreno3i@cafepress.com Female 2.85.251.176 5610875550247635 Guatemala 12/30/1983 156757.41 Research Nurse +1454460158 232 Susan Burns sburns6f@cbsnews.com Female 2.93.31.196 5602245359290816 China 10/25/1992 58832.39 Research Assistant IV +1454460185 711 Alice Robertson arobertsonjq@sakura.ne.jp Female 182.147.6.194 Thailand 8/9/1955 54046.02 Legal Assistant +1454460227 661 Phyllis Brown pbrownic@macromedia.com Female 115.89.196.124 Brazil 7/31/1990 245014.11 Librarian +1454460293 146 Christina Gibson cgibson41@over-blog.com Female 226.138.197.167 China 3/14/1987 201589 Accountant II +1454460311 259 Donna Marshall dmarshall76@jimdo.com Female 249.36.126.149 6709877241918640 Indonesia 4/15/1986 281443.65 Structural Engineer 123 +1454460317 899 Harold Robinson hrobinsonoy@privacy.gov.au Male 94.237.36.16 5602247816220394 Philippines 10/3/1955 181832.97 Civil Engineer 0/0 +1454460450 391 Stone Female 205.229.198.173 Portugal 10/13/1968 173807.29 Web Developer I +1454460563 814 Kelly Riley krileyml@4shared.com 166.51.39.101 3529610026130015 China 6/24/1987 \N Data Coordiator +1454460586 350 Ruth Green rgreen9p@vk.com Female 170.37.204.80 3567581372052553 Poland 10/30/1990 76094.37 Community Outreach Specialist +1454460599 284 Joyce Bryant jbryant7v@stumbleupon.com Female 125.142.215.135 3551722227261571 Czech Republic 26866.76 """" +1454460658 129 Paula Oliver poliver3k@barnesandnoble.com Female 108.49.104.111 3551237510305944 China 149572.54 +1454460668 85 John Hudson jhudson2c@rediff.com Male 75.191.191.171 3538638405093479 Georgia 6/22/1994 82621.71 Tax Accountant +1454460753 578 Clarence Gonzales cgonzalesg1@fc2.com 13.29.242.81 30237628216824 Norway \N +1454460790 754 Rose Brooks rbrookskx@chron.com Female 99.103.60.118 201422963957371 China 4/8/1994 201004.89 Legal Assistant 1/2 +1454460792 118 Charles Gonzalez cgonzalez39@google.com.au Male 52.126.168.127 Nigeria 8/26/1958 108318.24 Internal Auditor +1454460806 479 Henry Scott hscottda@cornell.edu Male 53.161.182.142 5602240199354518 Indonesia 6/22/1992 32141.19 Assistant Professor +1454460806 791 Anthony Butler abutlerly@springer.com Male 84.141.89.156 Czech Republic 8/21/1969 282078.29 Health Coach IV +1454460833 1000 Alice Peterson apetersonrr@parallels.com Female 244.89.94.58 5602227843485236 Nigeria 239858.7 +1454460836 246 Billy Spencer bspencer6t@mozilla.com Male 1.121.193.207 5127963978663124 Malta 275300.87 +1454460842 236 Susan Wilson swilson6j@mapy.cz Female 253.105.50.250 4913609318117229 Cameroon 5/10/2000 135956.76 Director of Sales +1454460867 161 Janice Armstrong jarmstrong4g@sciencedirect.com Female 76.231.89.120 6759331684315962 Philippines 7/14/1996 64638.14 Project Manager +1454460947 250 Larson Male 250.66.116.249 6709520051264027651 Indonesia 9/30/1975 121560.88 Staff Accountant I +1454460979 951 Arthur Long alongqe@devhub.com Male 92.244.136.245 4175006438208322 China 3/4/1959 74667.22 Pharmacist +1454461020 339 Doris Bennett dbennett9e@de.vu Female 98.5.171.133 4041599256556998 Nicaragua 85802.06 $1.00 +1454461049 725 Patrick Rodriguez prodriguezk4@blogs.com Male 233.167.251.29 3543135453573752 Poland 8/10/1956 129023.91 Web Designer IV +1454461082 359 Ruby Fox rfox9y@chron.com Female 39.224.24.103 3566813987246457 Moldova 199091.31 +1454461084 488 Mark Weaver mweaverdj@dot.gov Male 36.130.233.58 3568615406520315 China 225258.27 +1454461184 802 Joyce Lopez jlopezm9@ocn.ne.jp Female 232.61.24.78 Ecuador 258343.17 +1454461219 258 Paul Gordon pgordon75@gravatar.com Male 160.61.49.169 3567008825292446 Czech Republic 2/25/2000 258680.6 Structural Analysis Engineer +1454461293 230 Victor Campbell vcampbell6d@stumbleupon.com Male 212.43.106.70 China 9/19/1993 42985.78 Analog Circuit Design manager +1454461314 421 Timothy Gomez tgomezbo@examiner.com Male 33.5.250.113 373343849259778 Czech Republic 215485.48 +1454461350 944 Kelly Hanson khansonq7@phpbb.com 250.78.86.48 United States 1/2/1969 \N Account Executive +1454461510 985 Rachel Holmes rholmesrc@hubpages.com Female 182.16.233.193 3578965006812598 Nigeria 4/1/1980 273229.15 Assistant Professor +1454461537 400 Arthur Smith asmithb3@accuweather.com Male 107.97.38.111 5602233710304252 China 1/30/1985 114652.62 Mechanical Systems Engineer +1454461604 993 Christina Hayes chayesrk@xing.com Female 199.58.20.93 North Korea 10/30/1967 121659.5 Librarian +1454461701 67 Tina Reid treid1u@163.com Female 116.38.145.226 Germany 4/25/1967 228301.51 Financial Analyst +1454461723 708 Carlos Mason cmasonjn@state.tx.us Male 171.189.25.159 5402971302511824 Thailand 4/8/1965 163810.9 Business Systems Development Analyst +1454461756 816 Sara Sanders ssandersmn@cornell.edu Female 54.250.225.134 Netherlands 7/26/1998 261953.95 Quality Engineer +1454461763 299 Diane Watkins dwatkins8a@netvibes.com 141.246.209.93 Yemen \N +1454461897 976 Paula Ross prossr3@tumblr.com 39.229.193.40 3535447138661799 Jordan 8/19/1990 \N Budget/Accounting Analyst IV +1454461902 56 Thomas Freeman tfreeman1j@java.com Male 161.123.216.250 3536920916224146 Colombia 8/4/1973 239571.27 Senior Developer === Try load data from userdata3.parquet -1454515666 1 Ernest Fuller efuller0@examiner.com Male 106.72.28.74 5610608195667267 Israel 140639.36 -1454536327 2 Anthony Foster afoster1@weibo.com Male 156.243.130.166 4508242795214771 Indonesia 1/16/1998 172843.61 Developer II 👾 🙇 💁 🙅 🙆 🙋 🙎 🙍 -1454466139 3 Ryan Montgomery rmontgomery2@mozilla.org Male 28.55.168.128 Colombia 11/21/1978 204620.66 Developer I ␢ -1454473204 4 Brenda Nelson bnelson3@photobucket.com Female 185.81.160.85 Guatemala 10/29/1998 260474.12 GIS Technical Architect -1454458516 5 Jacqueline Ellis jellis4@amazon.com Female 158.137.238.6 Russia 7/12/1959 286038.78 Marketing Assistant -1454528894 6 Paul Ferguson pferguson5@gmpg.org Male 141.122.136.144 30501574577558 Thailand 241518.24 -1454489945 7 Linda Hunt lhunt6@prlog.org Female 104.179.97.82 Russia 3/30/1988 192756.38 Professor -1454486691 8 Frances Kim fkim7@blog.com Female 28.77.158.48 676306013856639159 Indonesia 188511.28 -1454487153 9 Jason Matthews jmatthews8@google.co.uk Male 72.129.239.24 3534550235909507 China 7/29/1982 238068.56 Web Designer III -1454519282 10 Carolyn Elliott celliott9@cpanel.net Female 51.211.70.30 3563436733386899 Indonesia 4/28/1977 132718.26 Research Nurse -1454473379 11 Thomas Mills tmillsa@psu.edu Male 104.114.227.199 5018278895598921190 Russia 236386.69 -1454534367 12 Russell Lee rleeb@howstuffworks.com Male 193.165.137.217 China 280252.36 🐵 🙈 🙉 🙊 -1454525264 13 Chris Bailey cbaileyc@redcross.org Male 246.109.118.154 30485245023962 Thailand 11/26/1970 200218.34 Research Assistant I +1454457607 457 Clarence Hunt chuntco@drupal.org Male 89.135.47.216 Zambia 9/27/1977 97179.31 Staff Accountant III 1E02 +1454457613 723 Arthur Jones ajonesk2@theguardian.com Male 31.151.216.146 France 2/6/1986 12068.96 Teacher +1454457706 234 Doris Grant dgrant6h@nasa.gov Female 195.132.180.36 5602256096038525 Colombia 7/14/1969 283813.79 Senior Cost Accountant 1454457712 14 Eric Parker eparkerd@usa.gov Male 25.73.91.135 5602249431899032 Russia 8/12/1986 102832.54 Tax Accountant -1454526788 15 Anne Robertson arobertsone@geocities.jp Female 209.77.27.30 Armenia 168201.04   -1454494278 16 Angela Gonzalez agonzalezf@state.gov Female 118.77.43.191 Sweden 7/1/1972 161220.37 Database Administrator I -1454488522 17 Edward Moreno emorenog@hp.com Male 200.50.125.67 3559979696602303 France 8/17/1966 144551.41 Chief Design Engineer -1454496145 18 Roy Murray rmurrayh@sphinn.com Male 91.52.226.221 3546330084792460 Portugal 285872.87 𠜎𠜱𠝹𠱓𠱸𠲖𠳏 -1454492939 19 Louis Willis lwillisi@hp.com 14.132.82.250 Philippines 8/1/1980 \N Director of Sales -1454530172 20 Edward Perez eperezj@china.com.cn Male 24.152.201.59 3571014044514515 Indonesia 29515.23 -1454518522 21 Nicole Price npricek@cpanel.net Female 4.21.204.142 Peru 5/8/1978 154023.3 Office Assistant III -1454496552 22 Virginia Nichols vnicholsl@ning.com Female 160.202.18.170 30166467912021 Greece 5/10/1966 145509.34 Programmer II -1454474290 23 Katherine Roberts krobertsm@hostgator.com Female 247.21.118.188 Cuba 192723.43 -1454522256 24 Emily Sullivan esullivann@sakura.ne.jp Female 33.152.103.14 4074771539744796 Indonesia 6/28/1965 36127.55 VP Sales -1454527958 25 Susan Turner sturnero@google.pl 150.94.47.96 374283138983226 United States \N -1454540961 26 Fred Jenkins fjenkinsp@walmart.com Male 219.195.7.86 China 3/23/1965 69388.75 Human Resources Assistant I -1454496916 27 Jane Torres jtorresq@photobucket.com Female 147.220.219.158 5002353015111222 Indonesia 9/29/1997 226788.25 Occupational Therapist -1454508711 28 Louis Patterson lpattersonr@wp.com Male 158.176.255.43 5100145505218793 China 9/20/1993 30309.45 VP Quality Control -1454538643 29 Brandon Wagner bwagners@slashdot.org Male 124.203.101.37 6771208405057819279 Iraq 10/3/1959 95522.88 Research Associate -1454484725 30 Amy Jenkins ajenkinst@wikia.com Female 21.0.126.111 3542005201579396 Ethiopia 9/26/1984 167682.84 Tax Accountant """\'""\'""\'\'\'""" -1454513613 31 Timothy Frazier tfrazieru@toplist.cz 100.218.94.178 China 5/17/1963 \N Director of Sales 0.00 -1454463548 32 Phillip Meyer pmeyerv@live.com Male 184.208.76.39 3541248561759148 France 11/3/1974 245572.41 Nurse -1454528692 33 Joe Wallace jwallacew@mail.ru Male 167.122.66.246 5602246900361320 Russia 64311.11 -1454466352 34 Walter Rivera wriverax@de.vu Male 67.169.221.120 5366484318587717 Russia 1/28/1983 271690.8 Programmer Analyst I -1454480715 35 Lois Mcdonald lmcdonaldy@paypal.com 44.140.199.251 Portugal \N -1454499439 36 William Edwards wedwardsz@acquirethisname.com Male 69.187.29.7 3528411636358679 Egypt 2/23/1958 252476.42 Financial Analyst Œ„´‰ˇÁ¨ˆØ∏”’ -1454460587 37 Frank Stevens fstevens10@samsung.com Male 61.182.84.178 Philippines 3/19/1958 47326.14 VP Product Management -1454536874 38 Albert Martinez amartinez11@godaddy.com Male 76.139.124.119 Ukraine 11/11/1994 57220.55 Software Engineer III -1454504601 39 Stephanie Stewart sstewart12@elpais.com Female 104.98.138.203 4905603900430425379 Syria 2/11/1975 250118.59 Developer I -1454521301 40 Annie Stevens astevens13@slate.com Female 214.146.163.79 3553338148582934 South Africa 11/8/1983 12963.52 Systems Administrator I -1E2 -1454460788 41 Joyce Butler jbutler14@csmonitor.com Female 88.243.175.236 Indonesia 135825.27 -1454460615 42 Carlos Armstrong carmstrong15@technorati.com Male 85.22.216.153 3532000356234436 Indonesia 23446.58 -1454537073 43 Frances Kelly fkelly16@springer.com Female 146.38.150.164 4026344347458956 China 242916.36 -1454507861 44 Amanda Pierce apierce17@phpbb.com Female 214.208.248.216 201678379872880 Faroe Islands 6/1/1990 38037.1 Software Test Engineer II 
test
 -1454464352 45 Alan Torres atorres18@histats.com Male 117.124.224.32 4844818559255911 Israel 114759.77 -1454528513 46 Nancy Brown nbrown19@lycos.com Female 98.103.84.222 4041378619584967 Portugal 9/16/1972 170596.79 GIS Technical Architect -1454518979 47 Kenneth Larson klarson1a@cnet.com Male 71.35.49.21 Philippines 2/3/1990 178010.01 Staff Scientist -1454536052 48 Thomas Lawson tlawson1b@canalblog.com Male 209.50.87.12 50201361710870252 Ukraine 10/5/1987 35118.14 Software Test Engineer II -1454488725 49 Debra Gomez dgomez1c@lycos.com Female 26.107.134.220 30508009555281 China 9/10/1979 129186.15 Electrical Engineer -1454489047 50 Deborah Price dprice1d@google.nl Female 207.145.225.232 4055636387933119 Russia 1/26/1983 165945.4 Dental Hygienist ␡ -1454478467 51 Diane Banks dbanks1e@wikispaces.com Female 22.253.228.131 China 39139.44 -1454468949 52 Marie Woods mwoods1f@bbc.co.uk 41.109.183.128 Russia 2/20/1989 \N Human Resources Manager -1454489570 53 Randy Romero rromero1g@tamu.edu Male 134.90.91.230 Indonesia 11/30/1960 230039.26 Professor -1454528266 54 Brandon Fox bfox1h@ocn.ne.jp Male 157.130.211.215 6391404048298002 China 2/1/1979 223567.43 Programmer III -1454513948 55 Albert Smith asmith1i@jalbum.net Male 167.84.86.133 3530479136988416 Ukraine 263457.42 -1454467976 56 Jeremy Black jblack1j@sphinn.com Male 181.85.144.139 Poland 194896.66 -1454463146 57 Marilyn Shaw mshaw1k@bloomberg.com Female 141.42.43.91 30110642387063 China 178473.04 -1454540383 58 Stephanie Diaz sdiaz1l@who.int Female 127.174.128.199 3571927033182087 Indonesia 3/25/1974 135570.75 Paralegal -1454492347 59 Christopher Reynolds creynolds1m@sun.com Male 81.89.26.14 China 5/29/1956 147519.69 Account Executive -1454529565 60 Douglas Holmes dholmes1n@weather.com Male 99.22.29.208 Honduras 11/29/2000 45372.51 VP Accounting œ∑´®†¥¨ˆøπ“‘ -1454485707 61 Howard Rogers hrogers1o@sciencedirect.com Male 222.229.220.65 Ukraine 2/26/1995 143231.21 Account Executive -1454489894 62 Melissa Washington mwashington1p@cmu.edu Female 32.151.71.144 374288910553246 Czech Republic 2/24/1966 266547.15 Human Resources Manager -1454541195 63 Margaret Flores mflores1q@usnews.com Female 108.42.248.249 France 8/25/1999 110594.3 Data Coordiator +1454457781 846 Sharon Porter sporternh@yelp.com Female 206.179.138.50 6706029727013149 Colombia 7/3/1966 175902.84 Project Manager +1454457884 637 Frank Hudson fhudsonho@walmart.com Male 52.37.91.110 4405081678166102 China 2/7/1997 126102.31 Senior Developer +1454457968 134 Teresa Gray tgray3p@ox.ac.uk Female 60.117.57.222 China 9/18/1994 159276.6 Assistant Media Planner +1454458022 549 Aaron Reid areidf8@topsy.com Male 117.148.230.113 Russia 3/25/1983 211580.8 Product Engineer +1454458079 156 Ann Morris amorris4b@newyorker.com Female 14.165.90.97 3553147941910493 Indonesia 6/4/1956 158396.75 Engineer I +1454458121 794 Joshua Flores jfloresm1@sphinn.com Male 84.212.10.197 3587575297567030 China 2/9/1989 267751.84 Developer III +1454458182 604 Steve Castillo scastillogr@ezinearticles.com Male 159.158.95.181 3545937730645529 China 6/8/1993 86028 Programmer III 1454458233 64 Rose Fernandez rfernandez1r@usgs.gov Female 199.141.221.229 3564435193511524 Brazil 5/5/1972 196329.18 Senior Cost Accountant -1454472500 65 Julie Mendoza jmendoza1s@unesco.org Female 137.192.7.121 3586331607810566 Cuba 149157.14 -1454515883 66 Earl Sanders esanders1t@github.com Male 179.122.203.141 3561742181897127 Vietnam 215545.14 𠜎𠜱𠝹𠱓𠱸𠲖𠳏 -1454460569 67 Eric Armstrong earmstrong1u@arizona.edu Male 128.202.252.112 4041590574307 Indonesia 5/30/1973 75347.18 Web Designer II -1454532395 68 Joyce Perez jperez1v@dmoz.org Female 145.86.183.96 Canada 3/29/1975 115579.36 Director of Sales -1454524697 69 Sanchez Female 100.163.22.106 Russia 127045.66 -1454489862 70 Laura Romero lromero1x@godaddy.com Female 237.131.116.77 3539134691869631 Madagascar 12/20/1957 208213.96 Business Systems Development Analyst -1454538359 71 Maria Thomas mthomas1y@lycos.com Female 12.113.23.220 5602229580950679 China 10/29/1990 88961.11 Nurse -1454520121 72 Victor Romero vromero1z@reference.com Male 208.79.116.61 6767842086446946518 Brazil 209207.14 -1454510241 73 Betty Hayes bhayes20@goo.ne.jp Female 153.254.225.4 201881044698306 Jordan 3/9/1970 173372.32 VP Accounting -1454465142 74 Roger Jacobs rjacobs21@rediff.com Male 51.122.147.153 36548589951538 Benin 7/18/1977 18545.32 Paralegal 1/2 -1454470850 75 Ruth Thompson rthompson22@reuters.com Female 220.41.116.217 67067442144878124 Croatia 6/30/1972 167279 Account Executive ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ -1454515259 76 Theresa James tjames23@un.org Female 31.135.76.146 China 12/28/1974 188732.88 Financial Advisor -1454517695 77 Pamela Collins pcollins24@nih.gov Female 21.45.74.249 490591529416018576 Moldova 7/28/1998 252394.72 Marketing Assistant 🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧 -1454523543 78 Adam Ward award25@telegraph.co.uk Male 242.85.131.30 201794641891036 Brazil 276446.24 1454458334 79 Robin Price rprice26@jugem.jp Female 235.141.108.176 5610389618618837 Russia 1/7/1977 120293.75 Biostatistician IV -1454529469 80 Barbara Ryan bryan27@usda.gov Female 58.0.103.48 30526192141883 Philippines 198959.68 -1454497076 81 Melissa Gibson mgibson28@census.gov Female 54.212.104.159 3529828486403520 Bhutan 7/29/1990 224163.74 Senior Developer -1454467979 82 Carolyn Morris cmorris29@cbslocal.com Female 86.106.24.230 Portugal 2/12/1958 87727.95 Quality Engineer 0.00 -1454484623 83 Stephen Harris sharris2a@un.org Male 247.19.48.100 Russia 4/9/1983 284559.55 Product Engineer ١٢٣ -1454476730 84 Linda Campbell lcampbell2b@mapy.cz Female 28.62.77.24 6759510168753943 Peru 2/27/1982 16435.84 VP Quality Control ␡ -1454463822 85 Brian Daniels bdaniels2c@ovh.net Male 143.36.66.196 Ecuador 7/6/1966 148952.4 Information Systems Manager 1454458337 86 West Female 247.72.186.254 3541609903446548 Indonesia 12/11/1984 132544.98 Physical Therapy Assistant -1454518267 87 Timothy Moore tmoore2e@printfriendly.com Male 109.229.170.253 Samoa 42697.58 -1454523368 88 Eric Walker ewalker2f@mozilla.com Male 243.173.35.155 Thailand 5/29/1970 48715.81 Engineer IV -1454486082 89 Maria Arnold marnold2g@google.com.br Female 58.58.77.228 3589928770150089 Uruguay 3/14/1956 64067 Geological Engineer -1454541738 90 Edward Garza egarza2h@moonfruit.com Male 43.21.138.236 New Zealand 3/27/1965 139025.58 Structural Analysis Engineer -1454490484 91 Alice Young ayoung2i@typepad.com Female 120.255.189.145 630468343049978318 Serbia 4/18/1981 17663.49 Automation Specialist I -1454512586 92 Kenneth Powell kpowell2j@unicef.org Male 238.251.71.34 3586683330377036 Philippines 2/10/1955 68010.82 Social Worker -1454472784 93 Kelly Bell kbell2k@hud.gov Female 176.210.241.20 Russia 11/17/1984 57640.41 Web Developer I   -1454490007 94 David Garcia dgarcia2l@tmall.com Male 100.18.61.166 Paraguay 201297.71 -1454504627 95 Maria Harvey mharvey2m@nydailynews.com Female 192.209.117.213 67593619471737741 Mongolia 283649.67 -1454505519 96 Chris Hall chall2n@imageshack.us Male 241.96.162.44 5594268668744901 Russia 1/3/1964 67656.08 Web Designer II -1454481847 97 Roger Simpson rsimpson2o@nymag.com Male 80.110.89.28 493618903455317947 Indonesia 76354.79 -1454515032 98 Richard Nelson rnelson2p@simplemachines.org Male 43.54.4.82 Brazil 237205.58 NIL +1454458375 939 Craig Jones cjonesq2@de.vu Male 154.208.206.255 Indonesia 1/29/1989 266312.01 Safety Technician II +1454458415 805 George Meyer gmeyermc@google.nl Male 146.59.222.51 Syria 5/28/1973 242409.4 Analog Circuit Design manager +1454458434 914 Earl Martinez emartinezpd@squidoo.com Male 150.29.51.94 677135530260451546 Philippines 10/25/1970 257708.77 Software Engineer II 1E+02 +1454458516 5 Jacqueline Ellis jellis4@amazon.com Female 158.137.238.6 Russia 7/12/1959 286038.78 Marketing Assistant +1454458597 371 Heather Fisher hfisheraa@printfriendly.com Female 190.23.234.91 6304245587473860 Portugal 4/24/1955 101118.28 Associate Professor +1454458619 680 Mildred Dean mdeaniv@alibaba.com Female 173.255.221.184 3576992005749797 Armenia 4/3/1979 78889.63 Desktop Support Technician "__ロ( +1454458806 695 Ashley Olson aolsonja@noaa.gov Female 233.175.155.3 376319939588935 Indonesia 5/8/1979 256795.8 Systems Administrator III +1454458825 946 Beverly Henderson bhendersonq9@amazon.com Female 96.37.213.162 3554635936579520 Russia 8/4/1979 65339.1 VP Marketing +1454458897 881 Collins Male 100.212.189.244 3531552235272517 South Korea 7/5/1981 72539.92 VP Sales +1454458915 332 Raymond Ward rward97@drupal.org Male 89.82.25.71 3538744508795034 South Africa 5/4/1994 163739.08 Data Coordiator +1454458981 216 Judy Gutierrez jgutierrez5z@ftc.gov Female 120.107.239.171 China 11/13/1965 36744.51 Statistician I 🐵 🙈 🙉 🙊 +1454458994 539 Donald Holmes dholmesey@examiner.com Male 24.129.145.78 3532611982139532 Czech Republic 11/7/1988 256744.28 Administrative Assistant I +1454459168 721 Christopher Hunt chuntk0@blogtalkradio.com Male 69.240.85.94 201463274401428 Indonesia 6/8/1968 32269.1 Data Coordiator +1454459172 733 Bonnie Hawkins bhawkinskc@vinaora.com Female 150.107.139.217 5010121004388204 China 8/28/1971 133958.72 Information Systems Manager +1454459204 768 Victor Nichols vnicholslb@blogs.com Male 231.113.119.58 3587933684998468 France 13777.53 +1454459243 803 Donald Wood dwoodma@parallels.com Male 212.8.149.51 67610717455795070 Mexico 6/22/1971 20752.43 Chief Design Engineer Œ„´‰ˇÁ¨ˆØ∏”’ +1454459252 752 Mark Gomez mgomezkv@hud.gov Male 116.39.31.225 337941154145279 Indonesia 1/12/1965 232731.06 Professor +1454459281 282 Jason Kelly jkelly7t@themeforest.net Male 129.110.129.46 3532753335256769 Botswana 122812.35 +1454459307 681 Carlos Fields cfieldsiw@trellian.com Male 253.69.168.229 3573119954905542 Japan 121346.35 +1454459430 892 Gloria Fowler gfowleror@apache.org Female 31.26.133.176 5602245069101311 Jamaica 5/31/1962 172923.11 Desktop Support Technician -1E+02 +1454459462 728 Jacqueline Porter jporterk7@example.com Female 183.189.204.28 3558636209028613 China 2/18/1966 60948.17 VP Marketing +1454459482 847 Brenda Hall bhallni@craigslist.org Female 239.232.28.195 Sweden 12/5/1962 14658.92 Senior Quality Engineer +1454459511 512 Phyllis Rice pricee7@t-online.de Female 141.247.60.33 4041591621552 China 3/9/1992 74670.8 Web Developer I +1454459535 331 Patrick White pwhite96@sina.com.cn Male 145.132.114.239 3534146356970178 Ukraine 1/19/1994 96246.01 Executive Secretary +1454459549 611 Elizabeth Day edaygy@archive.org Female 244.129.35.183 4903539550370988748 China 6/28/1974 217382.97 Paralegal 𠜎𠜱𠝹𠱓𠱸𠲖𠳏 +1454459623 424 Lillian Vasquez lvasquezbr@about.me Female 15.233.130.74 6706936038940735306 Netherlands 6/28/2000 256419.66 Account Representative I +1454459691 579 Irene Day idayg2@theglobeandmail.com Female 124.253.55.20 3564632724049897 Argentina 9/3/1974 58715.23 Teacher +1454459729 362 Melissa Stephens mstephensa1@comsenz.com Female 105.158.98.174 3534057744078246 Philippines 1/22/1974 210781.96 Cost Accountant ᠎ +1454459735 103 Justin Grant jgrant2u@lycos.com Male 251.111.132.81 3542141314461899 China 1/7/2001 140911.2 Project Manager +1454459793 662 Jesse Gonzales jgonzalesid@google.fr Male 215.192.238.90 3550826252709387 Peru 7/22/1978 260505.75 Environmental Specialist +1454459819 866 Andrea Carpenter acarpentero1@taobao.com Female 246.154.31.121 Japan 3/6/1984 248740.81 Senior Quality Engineer +1454459841 923 Marilyn Long mlongpm@walmart.com Female 215.6.99.179 5602241011840536 Cameroon 10/28/1964 110571.54 Social Worker +1454459858 560 Judy Wright jwrightfj@blogs.com 7.139.209.42 560222806370845260 Colombia 3/6/1961 \N Software Test Engineer IV +1454459862 244 Diane Hawkins dhawkins6r@hatena.ne.jp Female 90.247.138.242 4026763155071942 China 5/10/1968 171218.47 Help Desk Operator +1454459921 639 Gloria Fields gfieldshq@mlb.com Female 76.62.183.159 6334660493144630501 Peru 5/7/1996 210991.41 Accounting Assistant II +1454459945 193 Catherine Rivera crivera5c@liveinternet.ru Female 197.164.37.102 4903900636714991 China 10/17/1984 240545.5 Cost Accountant +1454459958 186 Larry Coleman lcoleman55@imdb.com Male 139.205.254.237 3549906950974212 Germany 12/19/1958 182376.29 Compensation Analyst +1454459959 195 Andrew Henderson ahenderson5e@ftc.gov Male 44.116.118.204 United States 5/27/1977 108242.9 Accountant I +1454460044 743 Mildred Clark mclarkkm@issuu.com Female 179.135.234.32 3589587359210761 Philippines 268426 -1E+02 +1454460050 189 Samuel Fox sfox58@bing.com Male 220.161.213.119 3535192418612498 Argentina 9/2/1991 56084.78 Marketing Assistant +1454460053 209 Anne Flores aflores5s@marketwatch.com Female 8.136.212.14 Canada 6/17/1964 195673.07 Occupational Therapist +1454460230 956 John Baker jbakerqj@exblog.jp Male 96.167.232.236 Spain 9/29/1992 177531.95 Sales Representative +1454460278 683 Paula Johnston pjohnstoniy@marketwatch.com Female 246.57.43.147 560221588257454843 Mongolia 10/20/1978 227145.54 Administrative Officer +1454460325 341 Samuel Jordan sjordan9g@jimdo.com Male 183.29.32.119 3535569167756420 China 3/29/1975 130541.17 Safety Technician IV +1454460330 654 Michael Sims msimsi5@discuz.net Male 169.136.209.75 Bulgaria 6/14/1982 277854.98 Recruiting Manager +1454460342 814 Deborah Hudson dhudsonml@parallels.com 186.205.3.210 Ukraine 11/3/2000 \N Marketing Manager +1454460373 813 Mildred Harris mharrismk@vistaprint.com Female 250.65.167.151 3577530968521354 Greece 238399.8 +1454460382 624 Wayne Henry whenryhb@dedecms.com Male 173.2.93.236 China 147631.62 +1454460446 1000 Wanda Brooks wbrooksrr@yellowpages.com Female 241.43.62.149 3539260761630759 Japan 158607.84 +1454460471 685 Joe Rivera jriveraj0@ebay.com Male 101.130.15.106 4903855508114581 Thailand 74067.89 +1454460482 330 Robin Campbell rcampbell95@stanford.edu Female 144.152.165.130 4662544509352 Sierra Leone 4/9/1969 64481.72 Quality Engineer +1454460569 67 Eric Armstrong earmstrong1u@arizona.edu Male 128.202.252.112 4041590574307 Indonesia 5/30/1973 75347.18 Web Designer II +1454460587 37 Frank Stevens fstevens10@samsung.com Male 61.182.84.178 Philippines 3/19/1958 47326.14 VP Product Management +1454460615 42 Carlos Armstrong carmstrong15@technorati.com Male 85.22.216.153 3532000356234436 Indonesia 23446.58 +1454460668 556 Lisa Turner lturnerff@ustream.tv Female 192.4.71.81 3579076936527626 China 127717.62 +1454460696 958 Howard Gomez hgomezql@people.com.cn 226.78.136.12 6706662408386172373 Philippines \N test⁠test‫ +1454460697 959 Kimberly Alvarez kalvarezqm@gizmodo.com Female 244.177.51.246 30135810163038 Philippines 8/5/1976 211292 Design Engineer +1454460701 612 Dorothy Hanson dhansongz@i2i.jp Female 165.73.75.69 Azerbaijan 9/5/1971 246728.41 Information Systems Manager +1454460759 126 Amy Roberts aroberts3h@dyndns.org Female 166.99.225.202 Costa Rica 273960.79 𠜎𠜱𠝹𠱓𠱸𠲖𠳏 +1454460768 822 Jane Tucker jtuckermt@arizona.edu Female 43.88.112.223 Sweden 55680.59 +1454460788 41 Joyce Butler jbutler14@csmonitor.com Female 88.243.175.236 Indonesia 135825.27 +1454460812 496 Jesse Cole jcoledr@sogou.com Male 106.227.88.115 50184107778776571 Peru 6/2/1965 205296.96 Actuary +1454460898 516 Wayne Carter wcartereb@g.co Male 151.122.136.210 3547971451281253 Portugal 1/22/1992 122139.24 Cost Accountant +1454460912 571 Joan Chavez jchavezfu@com.com Female 17.161.255.139 Poland 10/16/1972 277679.98 Safety Technician I +1454460930 166 Pamela Perkins pperkins4l@wsj.com Female 237.225.95.141 378608444146629 China 141169.54 +1454460959 128 Wayne Kim wkim3j@cdc.gov 196.5.87.192 5007668319479461 Malaysia 1/27/1979 \N Internal Auditor +1454460980 465 Julie Phillips jphillipscw@ning.com Female 186.219.160.248 5602251286921119 Spain 6/10/1976 120755.68 Marketing Manager /dev/null; touch /tmp/blns.fail ; echo +1454460991 144 Martha Martin mmartin3z@sakura.ne.jp Female 220.126.107.146 201779098970730 New Zealand 5/23/1985 88724.94 Administrative Officer +1454461001 874 Laura Wells lwellso9@mit.edu Female 135.67.140.204 5482317399663099 Sweden 12/4/1993 262303.96 Environmental Tech +1454461065 833 Lois Lee lleen4@zdnet.com Female 31.87.204.102 5602245033844400 Bulgaria 113425.72 +1454461292 575 Jessica Watkins jwatkinsfy@marketwatch.com Female 165.50.211.193 201566979007298 Macedonia 7/12/1989 253506.67 Food Chemist +1454461361 184 Clarence Moore cmoore53@bloglines.com Male 212.30.218.42 Indonesia 6/16/1974 283539.78 Internal Auditor +1454461642 406 Frances Ray frayb9@theguardian.com Female 24.12.13.133 3555958533555779 Colombia 9/19/2000 282052.82 Staff Accountant III +1454461847 446 Helen Ward hwardcd@indiegogo.com Female 249.175.182.167 3550054667502541 Colombia 2/15/1959 115934.54 Graphic Designer +1454461863 101 Irene Adams iadams2s@biblegateway.com Female 135.79.211.166 Palestinian Territory 7/29/1994 73723.8 Help Desk Technician 00˙Ɩ$- 1454461907 99 Ruth Howell rhowell2q@cornell.edu Female 190.170.191.14 China 5/2/1969 286113.38 Senior Quality Engineer -1454524115 100 Judith Garza jgarza2r@usnews.com Female 204.216.154.40 Ecuador 6/22/1962 256786.42 Teacher +1454461978 340 Gloria Wilson gwilson9f@soup.io Female 116.58.188.151 3539542269827494 Croatia 206401.2 +1454462106 132 Amanda Porter aporter3n@cloudflare.com Female 64.254.17.111 Brazil 7/26/1964 41956.4 Nurse +1454462425 102 Ralph Walker rwalker2t@sitemeter.com 101.111.216.188 Peru 4/15/1959 \N VP Accounting +1454462469 188 Christine Rodriguez crodriguez57@sciencedaily.com 240.122.189.81 6397046163164230 China 12/13/1998 \N Sales Representative +1454462692 106 Cynthia Vasquez cvasquez2x@washingtonpost.com Female 70.52.238.194 Kazakhstan 175907.62 1E+02 +1454462763 121 Heather Davis hdavis3c@hhs.gov Female 154.156.181.140 Poland 71140.46 +1454462944 704 Patrick Torres ptorresjj@ask.com Male 122.10.211.188 5602254083107544 Russia 10/28/1995 119841.99 Environmental Tech +1454463056 718 Tammy Simpson tsimpsonjx@imdb.com Female 28.114.238.250 5602250512089980 Russia 4/30/1987 240161.08 Human Resources Manager -1/2 +1454463110 548 Ryan 48.44.183.147 Russia 12/7/1999 \N Recruiting Manager +1454463111 206 Jeremy Boyd jboyd5p@sciencedirect.com Male 190.221.209.41 Mexico 8/17/1963 169562.93 Legal Assistant $1.00 === Try load data from userdata4.parquet -1454599685 1 Howard Morgan hmorgan0@typepad.com 158.178.195.62 Colombia 12/2/1992 \N Data Coordiator -1454581720 2 Jessica Schmidt jschmidt1@google.com Female 168.118.247.35 3565285464047941 Luxembourg 4/14/1995 222396.46 Research Nurse nil -1454608896 3 Beverly Flores bflores2@wikipedia.org Female 51.97.88.173 Sweden 2/15/1965 141112.8 Actuary -1454575874 4 Marilyn Sanchez msanchez3@intel.com Female 186.206.142.162 China 8/6/1969 87914.29 Structural Engineer -1454567588 5 Janice Mitchell jmitchell4@sina.com.cn Female 205.187.116.241 5610719759939376962 Poland 7/4/1995 269297.4 Systems Administrator I +1454544135 174 Arthur Bishop abishop4t@deliciousdays.com Male 23.143.216.45 3543731590226021 Portugal 74352.02 +1454544166 397 Adam Harrison aharrisonb0@symantec.com Male 24.23.251.104 30250631299455 United States 10/14/1976 220537.78 Systems Administrator IV +1454544275 676 Julia Turner jturnerir@tripadvisor.com Female 246.75.105.64 3573355428855000 Philippines 9/23/1975 43244.37 Engineer I +1454544290 694 Carol Griffin cgriffinj9@zimbio.com 4.106.189.110 Philippines 5/5/1958 \N Quality Engineer +1454544350 790 Michael Mitchell mmitchelllx@blog.com Male 142.112.74.125 China 74089.46 +1454544355 372 Brandon Hicks bhicksab@unicef.org Male 14.1.141.83 564182403737341280 China 10/4/1985 62678.54 Sales Representative +1454544427 582 Annie Spencer aspencerg5@gizmodo.com Female 193.135.127.103 Philippines 7/29/1965 32342.28 Cost Accountant +1454544628 802 Lois Gibson lgibsonm9@mayoclinic.com Female 226.250.177.108 5610916546870112 Thailand 5/16/1955 149273.02 Occupational Therapist +1454544647 382 Paul Sanders psandersal@photobucket.com Male 216.84.37.205 6385564398040268 Sweden 6/9/1980 240223.98 Mechanical Systems Engineer 1 +1454544648 364 Jason Fox jfoxa3@unesco.org Male 184.48.48.126 Japan 8/9/1976 84483.3 Mechanical Systems Engineer +1454544719 716 Diana Little dlittlejv@shop-pro.jp Female 168.15.235.95 Argentina 267712.23 +1454544765 766 Lisa Harper lharperl9@boston.com Female 26.253.184.166 4903454632131201206 China 9/30/1986 177862.14 Analog Circuit Design manager +1454544797 471 Linda Arnold larnoldd2@yellowbook.com Female 25.72.220.19 3573669257084239 Indonesia 2/6/1983 249094.03 GIS Technical Architect " +1454544833 508 Andrea Alvarez aalvareze3@amazon.co.uk Female 94.93.141.212 Indonesia 165484.69   +1454544883 991 Mary Willis mwillisri@i2i.jp Female 188.83.241.84 Russia 9/4/1992 133498.3 Payment Adjustment Coordinator +1454544907 137 Harry Thomas hthomas3s@edublogs.org Male 203.181.156.216 3586074069338235 Poland 6/6/1979 159098.74 Chemical Engineer +1454545008 824 Jack Hudson jhudsonmv@hp.com Male 195.27.62.30 Ukraine 9/19/1970 163426.27 Community Outreach Specialist +1454545044 173 Ruth Welch rwelch4s@spotify.com Female 7.253.134.135 3543426983427878 Japan 8/6/1964 203330.7 Paralegal +1454545053 225 Judy Greene jgreene68@discovery.com 246.203.234.47 589310636256482728 Dominica \N +1454545135 948 Janet Lawson jlawsonqb@indiatimes.com Female 90.48.142.31 4026186827051821 Philippines 197991.65 +1454545185 757 James Pierce jpiercel0@meetup.com Male 14.116.62.43 5018717793434778 Greece 12/25/1989 17173.34 Assistant Manager +1454545221 995 Philip Mcdonald pmcdonaldrm@tripadvisor.com Male 224.59.55.103 5108753554344402 France 4/22/1955 59331.14 Recruiting Manager 1454545227 6 William Williamson wwilliamson5@trellian.com Male 44.86.73.201 201849487683564 Indonesia 12/6/1993 95352.25 Librarian 1E+02 -1454602212 7 Jack James jjames6@sogou.com Male 59.184.76.208 3552911855395632 Indonesia 11/25/1968 82549.73 Compensation Analyst ‪‪test‪ -1454556325 8 Jesse Arnold jarnold7@soup.io Male 7.25.90.13 5100177285965756 Brazil 10/19/1987 257968.86 Executive Secretary -1454622627 9 Lori Woods lwoods8@fastcompany.com Female 147.157.215.9 4844532485570190 Indonesia 12/26/1975 186145.91 Health Coach I -1454601455 10 Juan Evans jevans9@zimbio.com Male 150.132.218.181 3578802610769023 Philippines 5/29/1988 129369.52 Social Worker -1454579490 11 Roy Matthews rmatthewsa@ucsd.edu Male 203.239.85.224 5100135134598509 Russia 192057.84 -1454586145 12 Kenneth King kkingb@zimbio.com 9.103.96.206 675913564329481832 Greece \N -1454568600 13 Raymond Green rgreenc@fc2.com Male 163.9.101.43 United States 1/28/1984 225094.01 Budget/Accounting Analyst III -1454603300 14 Lillian Stephens lstephensd@psu.edu Female 31.50.183.23 630455284969060148 Finland 6/1/1973 19354.85 Information Systems Manager -1454560697 15 Mary Gonzales mgonzalese@wired.com Female 91.42.17.109 3560985473023370 France 5/7/1966 23746.36 Compensation Analyst -1454561895 16 Roger Mason rmasonf@newyorker.com Male 169.33.172.204 3545036194973129 Norway 165855.47 -1454604198 17 Diane Cole dcoleg@unesco.org Female 157.11.85.209 Philippines 6/9/1994 105028.67 Assistant Manager -1454601270 18 Annie Hunt ahunth@ocn.ne.jp Female 169.47.232.187 5100177440436305 Poland 3/30/1992 266071.6 Legal Assistant -1454600872 19 Jacqueline Bradley jbradleyi@epa.gov Female 83.241.214.77 5100131814165289 Indonesia 12/1/1971 55440.88 Dental Hygienist -1454600248 20 Kathy Russell krussellj@joomla.org Female 158.32.89.44 3585627581021729 Indonesia 11/20/1999 29602.23 Sales Representative -1454551378 21 Beverly Barnes bbarnesk@europa.eu Female 189.157.45.179 3548552521258155 Bulgaria 4/21/1956 37295.89 Human Resources Assistant II -1454604764 22 Roy Morris rmorrisl@scribd.com 201.51.139.86 China \N -1454569146 23 Alice Ramos aramosm@utexas.edu Female 185.168.142.9 374622349140748 Philippines 4/20/1966 138021.54 Paralegal -1454597325 24 Todd Kelly tkellyn@fotki.com Male 46.19.203.86 4041599550654 Portugal 3/14/1998 84343.96 Executive Secretary () { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; } -1454551797 25 Lawrence Ramos lramoso@imageshack.us Male 5.96.81.47 5010121401502407 Palestinian Territory 1/26/1994 265545.92 Operator -1454605654 26 Jennifer Rogers jrogersp@so-net.ne.jp Female 31.48.54.193 5610097864736794573 Yemen 6/5/1992 138365.1 Computer Systems Analyst II -1454603775 27 Kimberly Morgan kmorganq@seesaa.net Female 154.61.255.47 China 14486.75 0/0 -1454606635 28 Jessica Marshall jmarshallr@mtv.com Female 164.101.35.148 3531025977662047 Brazil 7/2/1987 216211.96 VP Accounting -1454597817 29 Katherine Gordon kgordons@phoca.cz Female 248.30.182.15 5602230546469168 Italy 10/11/1956 48478.51 Librarian -1454557995 30 Jennifer Phillips jphillipst@pcworld.com Female 61.30.215.16 5100179891124018 Sweden 9/3/1967 254808.27 Software Consultant -1454613512 31 Gerald Nguyen gnguyenu@seesaa.net Male 9.13.167.17 67717376159922001 China 9/3/1972 285571.49 Tax Accountant -1454625134 32 Rose Ellis rellisv@walmart.com Female 250.88.7.15 3580333318847248 China 4/23/1987 47695.25 Systems Administrator II 和製漢語 -1454622672 33 Margaret Grant mgrantw@bbb.org Female 227.165.116.192 3565645038486711 Slovenia 12/10/1992 106452.61 Account Coordinator -1454568796 34 Jessica Wells jwellsx@blogtalkradio.com Female 185.189.187.186 Azerbaijan 9/13/1996 173164.24 Project Manager -1454582324 35 Henry Jenkins hjenkinsy@mit.edu Male 10.83.90.235 5602221853972654 China 11/12/1975 25740.85 Recruiter 田中さんにあげて下さい +1454545361 770 Gregory Henderson ghendersonld@issuu.com Male 233.65.87.175 Philippines 79047.27 +1454545379 713 Ruth Barnes rbarnesjs@google.it Female 29.37.239.173 56108753791531632 Sweden 8/23/1965 268965.5 Occupational Therapist +1454545666 430 Stephen Knight sknightbx@so-net.ne.jp Male 233.213.210.160 China 7/7/1969 183842.12 Quality Control Specialist +1454545825 470 Carl Freeman cfreemand1@de.vu Male 40.13.20.8 5002357075956137 Armenia 1/6/1984 140264.63 Accountant III +1454545841 736 Ashley Black ablackkf@freewebs.com Female 130.87.75.86 30046346841197 China 5/8/1991 263407.66 Senior Developer 1454545876 36 Earl Mccoy emccoyz@bigcartel.com Male 161.179.122.154 5038877150819047588 Japan 10/12/1976 114766.43 Software Test Engineer IV 0.00 -1454618571 37 Paul Knight pknight10@google.cn Male 182.38.37.173 5020715558032859593 Ukraine 10/25/1971 199366 Social Worker -1454576590 38 Martha Clark mclark11@usda.gov 189.166.203.239 South Korea \N -1454601033 39 Clarence Bryant cbryant12@bigcartel.com Male 120.218.175.241 Poland 9/1/1968 257075.65 Professor 田中さんにあげて下さい -1454548319 40 Joan Price jprice13@mtv.com 233.4.158.135 3584182571037112 Portugal \N -1454573152 41 Anthony Ford aford14@chicagotribune.com Male 100.240.61.163 Iran 6/26/1992 152800.71 Senior Cost Accountant -1454595667 42 Roger Henderson rhenderson15@sitemeter.com Male 206.185.213.252 3560757094744860 Brazil 6/26/1970 40949.78 Nurse -1454591751 43 Kenneth Butler kbutler16@youtu.be Male 2.12.57.207 3586795027670612 Thailand 3/26/1987 165121.43 Research Assistant IV -1454566774 44 Kenneth Wright kwright17@google.de Male 241.213.136.95 5602246924892961 Belarus 10/15/1995 227583.86 Speech Pathologist -1454617513 45 Aaron Smith asmith18@flickr.com Male 185.244.9.145 China 11/25/1972 286108.94 Paralegal -1454574169 46 Amy Matthews amatthews19@t.co Female 206.172.83.152 5002357749310919 China 39365.73 -1454586102 47 Janet Cooper jcooper1a@dailymotion.com Female 9.148.129.197 Comoros 8/2/1968 168391.72 Senior Cost Accountant -1454601994 48 Russell Stewart rstewart1b@edublogs.org Male 113.23.229.63 675993663890158630 Thailand 4/17/1963 57609.96 Senior Editor -1454582839 49 Howard Elliott helliott1c@illinois.edu Male 225.208.151.89 3577055641640512 Mongolia 176999.03 -1454573932 50 Keith Lane klane1d@eventbrite.com Male 250.24.9.55 Russia 5/27/1983 80452.19 Budget/Accounting Analyst II -1454583292 51 Jimmy Richardson jrichardson1e@vimeo.com Male 152.87.188.99 China 6/30/1960 194774.28 Assistant Manager ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 -1454623280 52 Justin Bryant jbryant1f@github.com Male 245.48.63.169 3562259518717901 Guatemala 10/28/1960 144419.21 Database Administrator III -1454582337 53 Ruby Allen rallen1g@cyberchimps.com Female 238.148.148.156 3541217939068433 Japan 248388.64 -1454578101 54 Ward Male 120.88.247.59 Russia 125075.78 +1454545911 981 Martin Hudson mhudsonr8@senate.gov Male 103.7.125.212 3580063273741488 Azerbaijan 55371.91 +1454545934 728 Brandon Oliver boliverk7@tuttocitta.it Male 190.202.45.71 3561315827587251 Norway 10/31/1960 157819.05 Structural Engineer +1454545941 224 Julia Lane jlane67@networksolutions.com Female 126.98.58.100 3566544839563357 Brazil 9/24/1975 77279.09 Business Systems Development Analyst +1454545957 112 Mildred Martinez mmartinez33@wufoo.com 206.47.25.150 Brazil \N +1454546022 147 John Henry jhenry42@google.nl Male 175.38.124.31 3534881822199867 China 7/7/1959 180821.73 Engineer I "<>?:""{}|_+" +1454546057 132 Rose Evans revans3n@hubpages.com Female 18.134.14.151 6767390430172490489 United States 2/11/1977 109352.69 Automation Specialist II +1454546075 162 Nancy Sanchez nsanchez4h@yahoo.com Female 180.250.167.88 Malawi 5/12/1956 280050.1 Health Coach III +1454546122 913 Lisa Oliver loliverpc@nydailynews.com Female 153.239.15.222 201665522335840 Sweden 1/28/1957 180645.76 Marketing Assistant () { 0; }; touch /tmp/blns.shellshock1.fail; 1454546163 55 Nancy Stephens nstephens1i@godaddy.com Female 211.0.225.116 Mongolia 20805.69 -1454580277 56 Dorothy Kennedy dkennedy1j@mlb.com Female 177.229.94.96 Indonesia 3/26/1984 118098.45 Legal Assistant -1454597567 57 Katherine Ferguson kferguson1k@google.cn Female 185.67.150.20 5038883804496681778 Russia 1/28/1982 255040.89 Chemical Engineer -1454609494 58 Norma Daniels ndaniels1l@adobe.com Female 72.161.56.76 5602256058813840 Lithuania 5/30/1986 228396.52 Junior Executive -1454549169 59 John Rogers jrogers1m@miitbeian.gov.cn Male 91.131.170.178 3578552255653202 Croatia 9/25/1971 164207.53 Administrative Assistant III -1454627177 60 Lisa Nguyen lnguyen1n@phpbb.com Female 99.51.36.31 3587343436670904 Ghana 6/10/1970 213963.71 Research Nurse -1454564279 61 Roy Carter rcarter1o@cmu.edu Male 154.176.171.103 3581163353975466 Germany 7/21/1980 216294.79 Marketing Manager -1454546835 62 Donna Gonzalez dgonzalez1p@instagram.com Female 81.57.136.186 China 3/3/1975 181562.45 Junior Executive -1454610240 63 Medina Female 84.135.250.216 3579667388606106 Indonesia 7/18/1958 80267.81 Accounting Assistant III -1454613635 64 Samuel Bishop sbishop1r@npr.org Male 87.38.89.122 3534693555244475 Indonesia 97009.57 -1454551032 65 Jerry Bradley jbradley1s@umn.edu Male 184.79.105.210 5602258009829107 China 3/13/1984 50863.85 Junior Executive -1454555641 66 Ralph Castillo rcastillo1t@nba.com Male 96.246.167.130 6373313274491359 United States 5/14/1986 13099.91 Health Coach III -1454615262 67 Margaret Vasquez mvasquez1u@tuttocitta.it Female 206.79.16.146 Poland 2/19/1973 281677.49 Quality Engineer -1454564143 68 Shawn Payne spayne1v@privacy.gov.au Male 233.32.138.222 6380689013620353 China 5/29/1996 152175.99 Help Desk Operator -1454560234 69 Bonnie Hart bhart1w@networkadvertising.org Female 92.158.145.51 5100141023990187 Philippines 8/10/1976 270525.27 Clinical Specialist -1454557523 70 Ruby Phillips rphillips1x@google.com.hk Female 180.71.236.34 Russia 12/29/1980 175991.04 Analog Circuit Design manager -1454615738 71 Michael Watkins mwatkins1y@infoseek.co.jp Male 20.48.165.57 6304600968704640 United States 277599.55 -1454549243 72 Walter Hill whill1z@fda.gov Male 169.189.26.193 Philippines 4/25/1989 170789.26 Executive Secretary -1454590835 73 Deborah Garcia dgarcia20@ehow.com Female 176.149.163.227 3578754434491831 Brazil 213787.81 !@#$%^&*() -1454592567 74 Sandra Lee slee21@hatena.ne.jp Female 196.212.29.124 China 12/25/1976 190399.56 Assistant Media Planner ../../../../../../../../../../../etc/passwd%00 -1454570808 75 Steve Shaw sshaw22@photobucket.com Male 56.32.41.109 3561652394394350 Macedonia 3/2/1961 180130.01 Recruiting Manager -1454627208 76 Jerry Hansen jhansen23@newyorker.com Male 180.99.147.201 36652106508977 Ukraine 4/27/1992 201900.61 Chief Design Engineer -1454595596 77 Joshua Harris jharris24@china.com.cn Male 93.173.2.87 3566428334927244 Greece 8/27/1987 189392.3 Account Representative III -1454615457 78 Clarence Simmons csimmons25@dailymotion.com Male 30.117.30.162 3571762129017388 Philippines 180434.25 -1454604481 79 Denise Bishop dbishop26@wsj.com Female 251.230.214.155 3556286320706184 Philippines 10/18/1999 194426.62 Geologist II -1454614660 80 Jason Warren jwarren27@shop-pro.jp Male 197.52.56.75 4913424719275497 China 8/26/1998 92571.41 Accounting Assistant II -1454592347 81 Jesse Reynolds jreynolds28@amazon.com 46.11.66.226 Portugal 10/6/1977 \N Administrative Officer -1454579746 82 Ruby Lynch rlynch29@xing.com Female 50.190.120.2 340177638737200 Portugal 5/7/1981 159634.3 Sales Associate -1454578991 83 Phillip Olson polson2a@marriott.com Male 38.205.137.200 4905640692662084 Indonesia 1/8/1987 161622.19 Assistant Media Planner -1454574785 84 Sean Watkins swatkins2b@ft.com Male 22.52.43.242 6759770945991352 China 2/7/1964 103943.54 Senior Financial Analyst -1454603364 85 Teresa Parker tparker2c@shinystat.com Female 36.134.254.22 4844522554899455 China 11/24/1987 137739.95 Chief Design Engineer -1454629483 86 Anthony Harris aharris2d@uiuc.edu Male 142.3.139.220 China 2/26/1975 194926.38 Senior Quality Engineer -1454617821 87 Donna Ray dray2e@wikimedia.org Female 122.113.90.100 3548062974262878 Peru 7/24/1964 121072.45 Clinical Specialist åß∂ƒ©˙∆˚¬…æ -1454567199 88 Craig Lewis clewis2f@purevolume.com Male 106.156.113.218 3535698276698452 Slovenia 113013.98 -1454606687 89 Adam Turner aturner2g@delicious.com Male 94.92.15.85 3530109929436477 Sweden 3/18/1976 233715.21 Nurse Practicioner -1454565501 90 Terry Parker tparker2h@hc360.com Male 189.36.77.133 China 4/2/1987 232623.76 GIS Technical Architect -1454604198 91 Juan Shaw jshaw2i@ehow.com Male 222.127.83.190 493610712595084582 Democratic Republic of the Congo 220779.8 -1454592729 92 Nicole Russell nrussell2j@angelfire.com Female 247.123.224.36 4120730296866808 Germany 90748.17 -1454563310 93 Robin Ray rray2k@t.co Female 217.150.228.185 Sweden 9/28/1968 175995.93 Human Resources Assistant III """" +1454546263 498 Lillian Lynch llynchdt@posterous.com Female 13.168.64.88 Brazil 6/18/1982 203558.13 Accountant I +1454546287 934 Mark Dunn mdunnpx@booking.com Male 77.125.49.164 Indonesia 7/2/1990 120101.43 Financial Advisor +1454546293 748 Carol Perry cperrykr@cmu.edu Female 113.54.30.174 675928304974727871 Colombia 122048.92 +1454546294 830 Catherine Rice cricen1@hexun.com Female 134.65.177.193 Portugal 100751.27 +1454546294 924 Jimmy Nelson jnelsonpn@rediff.com Male 244.130.194.232 Norway 259092.5 +1454546377 431 Pamela Ruiz pruizby@java.com 42.71.124.95 Pakistan 9/15/1976 \N Software Engineer I +1454546405 158 Melissa Alexander malexander4d@google.pl Female 186.71.215.96 Greece 5/7/1972 180150.8 VP Marketing 1454546406 94 Debra Sims dsims2l@meetup.com Female 150.198.93.159 5602215295621929 Brazil 12/21/1984 276704.96 Office Assistant IV -1454550946 95 Teresa Harrison tharrison2m@t.co Female 111.107.40.16 5007666196554596 Philippines 5/12/1959 129967.9 GIS Technical Architect -1454603302 96 Tammy Ward tward2n@51.la Female 148.119.68.255 3568303818489466 France 8/20/1984 63550.31 General Manager -1454605950 97 Louis Harrison lharrison2o@usgs.gov Male 134.95.151.68 5100179516595931 Ukraine 9/27/1986 169379.73 Payment Adjustment Coordinator -1454579744 98 Charles Simpson csimpson2p@mashable.com Male 241.0.124.209 3562073915241617 Sweden 9/20/1956 116909.68 Biostatistician IV -1454584629 99 Maria Richards mrichards2q@rediff.com Female 108.13.82.54 Azerbaijan 1/23/1978 34000.68 Clinical Specialist 社會科學院語學研究所 -1454622328 100 Diana Hall dhall2r@oaic.gov.au Female 6.215.107.104 3528227609255704 Russia 8/29/1996 221168.13 Assistant Professor +1454546500 909 Cynthia Smith csmithp8@house.gov Female 166.21.108.146 374622628177056 China 9/30/1974 252566.03 Physical Therapy Assistant +1454546508 599 John Lewis jlewisgm@youtube.com Male 90.227.58.221 Sweden 5/16/1970 58222.46 Software Engineer II +1454546576 617 Jonathan Hall jhallh4@upenn.edu Male 12.13.126.157 491109978928388311 China 5/16/1986 50824.51 GIS Technical Architect +1454546653 227 Helen Green hgreen6a@vimeo.com Female 156.198.175.255 5048379124161648 Uganda 10/20/2000 163189.36 Computer Systems Analyst III +1454546690 349 David Washington dwashington9o@un.org Male 131.53.93.63 3578517361666653 Greece 10/16/1998 34742.07 Staff Accountant IV +1454546703 474 Betty Cook bcookd5@admin.ch Female 23.9.243.170 China 5/16/1962 151829.78 Budget/Accounting Analyst I +1454546726 767 Philip Burton pburtonla@zimbio.com Male 138.134.59.28 3528288812489043 Russia 5/27/1983 241065.94 Software Engineer III +1454546741 904 Debra Wilson dwilsonp3@desdev.cn Female 254.162.119.226 630461807132739339 Poland 4/20/1969 107766.71 Financial Analyst +1454546820 480 Todd Wagner twagnerdb@reuters.com 25.149.209.61 3560449524302754 Tunisia 8/31/1983 \N Research Associate +1454546835 62 Donna Gonzalez dgonzalez1p@instagram.com Female 81.57.136.186 China 3/3/1975 181562.45 Junior Executive +1454546857 666 Anthony Sullivan asullivanih@boston.com Male 119.85.206.152 561007482254370160 Portugal 5/20/1970 164827.57 Systems Administrator IV +1454546901 366 Julie Garrett jgarretta5@wsj.com Female 40.18.147.38 China 225753.62 +1454546930 808 Russell Freeman rfreemanmf@comcast.net Male 244.181.177.133 30295400628590 Greece 173731.67 +1454546970 739 Nicholas Sanders nsanderski@scientificamerican.com Male 13.8.6.64 347899819407351 Portugal 6/3/1991 130727.91 Research Associate +1454547011 198 Timothy Ford tford5h@vk.com Male 3.35.147.123 5602236379905962 Morocco 4/27/1998 55901.49 Paralegal +1454547029 644 Jean Cole jcolehv@mac.com Female 5.188.221.124 Comoros 7/24/1985 215195.83 Civil Engineer +1454547030 916 Andrew Campbell acampbellpf@nymag.com Male 172.206.158.110 Guatemala 8/12/1962 33394.2 Financial Analyst +1454547032 264 Charles James cjames7b@wordpress.org Male 40.115.241.175 6761364619849686314 Canada 9/21/1958 227083.18 Professor +1454547070 727 Louise Castillo lcastillok6@cmu.edu Female 54.15.177.72 3586380225985649 France 3/22/1978 17830.21 Nurse +1454547124 168 Christopher Hughes chughes4n@businessinsider.com Male 23.110.32.151 6304281728252855 Serbia 12/9/1975 220573.8 Design Engineer 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +1454547192 232 Angela Evans aevans6f@a8.net Female 115.244.254.13 6333718316396730 China 1/7/1968 265380.99 VP Quality Control +1454547201 464 Marie Harris mharriscv@dot.gov Female 26.45.137.53 Tajikistan 9/24/1961 203845.4 Analog Circuit Design manager +1454547203 427 Rebecca Thompson rthompsonbu@wikipedia.org Female 110.47.151.2 Indonesia 4/29/1992 216830.25 Assistant Manager +1454547212 575 Arthur Reyes areyesfy@ca.gov Male 161.254.47.140 Poland 3/9/1962 214072.68 Health Coach I +1454547223 564 Rebecca Ford rfordfn@stanford.edu Female 210.231.201.84 Indonesia 9/3/1969 204041.63 Office Assistant II ÅÍÎÏ˝ÓÔÒÚÆ☃ +1454547242 997 William Patterson wpattersonro@omniture.com Male 149.242.140.255 3528460022712031 Colombia 3/1/2000 108955.05 Executive Secretary ✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿 +1454547281 463 Gerald Knight gknightcu@independent.co.uk Male 34.192.129.107 China 4/27/1975 84585.78 Civil Engineer ⁰⁴⁵ +1454547356 276 Albert Gordon agordon7n@examiner.com Male 88.159.237.102 3534524682255003 Sweden 8/25/1996 265299.22 Assistant Media Planner +1454547362 317 Clarence Simpson csimpson8s@comsenz.com Male 104.53.119.249 3586887721906879 Venezuela 3/7/1977 35314.18 Professor +1454547369 217 Anthony Jacobs ajacobs60@ycombinator.com Male 59.162.173.59 374283051163301 Ivory Coast 7/11/1988 103409 Safety Technician II (。◕ ∀ ◕。) +1454547401 230 Jimmy Bailey jbailey6d@odnoklassniki.ru Male 22.173.156.124 3576503167968271 China 197603.47 $1.00 +1454547508 960 Craig Shaw cshawqn@wordpress.org Male 88.203.243.165 5602229798654196 Tanzania 8/5/1999 119584.32 Senior Sales Associate +1454547541 585 Bonnie Snyder bsnyderg8@ftc.gov Female 170.100.220.94 3564602303009802 Japan 6/5/1998 89020.39 Desktop Support Technician +1454547577 871 Gloria Howard ghowardo6@harvard.edu Female 173.45.99.88 Egypt 8/27/1972 140945.69 Human Resources Assistant I +1454547609 878 Kathryn Snyder ksnyderod@e-recht24.de Female 235.195.131.110 6761199763991532 Indonesia 3/29/1973 168235 GIS Technical Architect +1454547697 473 Joseph Coleman jcolemand4@ucoz.ru Male 1.40.64.123 4508104337648496 Argentina 6/14/1975 167526.19 Librarian /dev/null; touch /tmp/blns.fail ; echo +1454547707 206 Shirley Ruiz sruiz5p@dagondesign.com Female 159.102.238.195 201955789975119 Bosnia and Herzegovina 10/8/1963 197240.2 General Manager +1454548013 705 Alan Sims asimsjk@ed.gov Male 180.200.150.10 3531118427209962 Israel 12/8/1982 269504.53 Biostatistician III +1454548122 108 Craig Knight cknight2z@ucsd.edu Male 139.37.241.169 3556934424099549 Greece 2/21/1955 247303.71 Senior Financial Analyst Ω≈ç√∫˜µ≤≥÷ +1454548170 611 Steve Ford sfordgy@hubpages.com Male 190.25.153.64 56022386492755060 China 6/7/1979 39645.72 Health Coach IV ̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕ +1454548319 40 Joan Price jprice13@mtv.com 233.4.158.135 3584182571037112 Portugal \N +1454548438 618 Jeremy Roberts jrobertsh5@go.com Male 89.14.246.154 Russia 7/31/1989 273400 Research Assistant II +1454548507 314 Dixon Male 93.252.91.51 670677121929947139 Ireland 209533.24 +1454548522 522 Eric Kelley ekelleyeh@pcworld.com Male 131.75.70.227 Syria 7/22/1990 163141.3 General Manager "__ロ( +1454548725 133 Lillian Collins lcollins3o@csmonitor.com 80.80.47.76 4175009027155995 Czech Republic \N +1454549109 306 Mark Boyd mboyd8h@cocolog-nifty.com Male 158.13.1.119 3562815747212335 Brazil 2/15/1967 66134.2 Social Worker +1454549131 371 Carl Knight cknightaa@unc.edu Male 64.176.41.31 Macedonia 6/4/1973 116193.06 Environmental Specialist +1454549158 346 Kathryn Butler kbutler9l@washingtonpost.com 32.220.87.246 374288729624402 China 11/24/1972 \N Staff Accountant II +1454549169 59 John Rogers jrogers1m@miitbeian.gov.cn Male 91.131.170.178 3578552255653202 Croatia 9/25/1971 164207.53 Administrative Assistant III +1454549202 304 Billy Howard bhoward8f@geocities.com Male 101.47.248.109 3561004867229459 Ireland 2/23/1963 147308.45 Software Test Engineer II +1454549230 702 Patricia Oliver poliverjh@cmu.edu 18.206.245.40 Ireland \N ÅÍÎÏ˝ÓÔÒÚÆ☃ +1454549233 179 Christine Duncan cduncan4y@furl.net 49.36.119.18 30544573199206 China 8/15/2000 \N Mechanical Systems Engineer +1454549243 72 Walter Hill whill1z@fda.gov Male 169.189.26.193 Philippines 4/25/1989 170789.26 Executive Secretary +1454549360 862 Joseph Patterson jpattersonnx@google.it Male 79.70.102.172 3548682692624495 Argentina 87931.98 === Try load data from userdata5.parquet -1454582047 1 Kelly Ortiz kortiz0@omniture.com Female 252.115.158.159 3537905681760845 Russia 4/23/1980 277302.99 Nurse -1454626441 2 Sharon Carroll scarroll1@disqus.com Female 29.217.252.62 56022458507191696 Indonesia 8/28/1992 209258.05 Recruiter åß∂ƒ©˙∆˚¬…æ -1454608790 3 Ruth Ross rross2@cbc.ca Female 220.224.80.32 3589642396435648 Benin 6/13/1994 18270.7 Design Engineer -1454601797 4 Kelly Meyer kmeyer3@cornell.edu Female 255.65.123.124 Philippines 1/6/1967 17485.27 Cost Accountant -1454584344 5 Irene Jordan ijordan4@pagesperso-orange.fr Female 162.57.23.136 3576848317807089 United States 1/4/1997 163979.38 Programmer Analyst III -1454547199 6 Irene Wells iwells5@fema.gov Female 85.5.67.113 Iran 74337.42 -1454604109 7 Jessica Grant jgrant6@gov.uk Female 127.235.63.12 3536345996536989 Ecuador 1/27/1969 128665.86 Payment Adjustment Coordinator -1454549472 8 Norma Wright nwright7@prweb.com Female 81.219.156.187 63047796765720509 Indonesia 6/27/1997 68907.46 Office Assistant III -1454611735 9 Brandon Snyder bsnyder8@artisteer.com Male 102.118.191.191 490339322609872711 Malta 10/6/1981 71646.15 Physical Therapy Assistant -1454610256 10 Stephanie Reed sreed9@who.int Female 175.52.228.75 502081312903167845 Afghanistan 8/27/1957 137924.13 Recruiter 
test
 -1454565105 11 Jane Armstrong jarmstronga@state.gov 202.44.98.126 374283443294665 China 10/30/1991 \N Associate Professor -1454607247 12 Donna Coleman dcolemanb@upenn.edu Female 178.9.167.99 Vietnam 11/21/1957 93283.06 Librarian -1454567839 13 Samuel Butler sbutlerc@hp.com Male 129.114.220.80 3587725229492688 Colombia 9/12/1984 208303.6 Compensation Analyst -1454567413 14 Jerry Medina jmedinad@youtu.be Male 87.0.152.222 3579766249568578 Japan 8/30/1988 53502.26 Registered Nurse -1454603317 15 Samuel Lane slanee@i2i.jp Male 225.20.25.160 Canada 9/6/1983 142643.38 GIS Technical Architect ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 -1454630090 16 Kathy Rice kricef@independent.co.uk Female 4.200.99.226 6709951086431189768 Philippines 52614.1 -1454575979 17 Adam Woods awoodsg@mapy.cz Male 229.247.245.218 3580417672766100 Indonesia 12/8/1987 284906.49 Payment Adjustment Coordinator -1454555573 18 Theresa Ellis tellish@nydailynews.com Female 39.249.101.160 Belarus 6/18/1966 35216.95 Sales Representative -1454555343 19 Christopher Brooks cbrooksi@intel.com Male 252.52.58.13 China 119492.57 1454544139 20 Debra White dwhitej@umn.edu Female 142.140.184.111 Indonesia 47859.54 -1454559526 21 Alice Ward awardk@cafepress.com Female 14.157.183.41 3554057857533990 Vietnam 5/7/1977 117790.3 Technical Writer -1454597106 22 Tina Wood twoodl@businesswire.com Female 201.242.103.145 3568980472135848 Sweden 3/28/1969 47283.17 Staff Scientist -1454591306 23 Carolyn Mendoza cmendozam@army.mil 214.205.231.22 Greece \N ␡ -1454611603 24 Craig Ford cfordn@vistaprint.com Male 236.178.217.229 633110713949459104 Indonesia 12/22/1996 274187.59 Dental Hygienist -1454618551 25 Christine Morrison cmorrisono@ask.com Female 219.71.212.187 3538407669945679 Tanzania 3/12/1991 84756.66 Executive Secretary 社會科學院語學研究所 -1454580024 26 Janice Dean jdeanp@statcounter.com Female 49.234.145.208 3537160378882698 Ukraine 8/21/1991 217443.08 Administrative Assistant III -1454558127 27 Joan Burton jburtonq@oaic.gov.au Female 221.227.41.244 201770241278691 China 4/6/1993 256763.22 Staff Accountant I \N -1454619460 28 Brandon Stone bstoner@discovery.com Male 1.106.6.30 30535344906416 Indonesia 7/13/1964 166396.41 Health Coach II -1454571966 29 Sarah Hall shalls@loc.gov Female 235.168.89.65 3528746985103311 Czech Republic 11/13/1959 123411.44 Assistant Manager -1454569447 30 Kelly Crawford kcrawfordt@typepad.com Female 152.220.24.54 3578225435679583 Poland 10/21/1970 115305.8 Chief Design Engineer -1454609438 31 Maria Banks mbanksu@google.co.uk Female 107.120.193.133 5602224764294077 Italy 10/29/1981 213273.21 Financial Analyst -1454546937 32 Roy Simmons rsimmonsv@telegraph.co.uk Male 21.20.158.183 5602244835346375 Mongolia 6/27/1994 13987.6 Senior Editor "<>?:""{}|_+" -1454611880 33 Judith Williamson jwilliamsonw@hubpages.com Female 128.75.193.80 3540423032294659 Indonesia 10/19/1975 35326.68 Senior Sales Associate -1454567714 34 Joe Arnold jarnoldx@soundcloud.com Male 170.118.207.254 4017955870878 Morocco 1/11/1991 261893.92 Mechanical Systems Engineer -1454605829 35 Richard Griffin rgriffiny@barnesandnoble.com Male 180.74.211.58 3539729371124817 Philippines 8/23/1964 43742.89 Nurse -1454607440 36 Billy Freeman bfreemanz@fda.gov Male 223.238.104.92 Sweden 5/19/1961 185185.85 Office Assistant I -1454601803 37 Shawn Welch swelch10@oaic.gov.au Male 239.144.169.67 Brazil 45785.65 ‪‪test‪ -1454626608 38 Kenneth Price kprice11@tamu.edu Male 121.107.99.253 372301962802254 China 3/1/1958 110448 Senior Sales Associate -1454612578 39 Patricia Lawson plawson12@dailymotion.com Female 181.201.209.42 6761282787969476 Czech Republic 4/6/1956 126454.68 Staff Accountant I -1454544201 40 Christine Alexander calexander13@aboutads.info Female 163.32.3.92 50183677518131890 China 1/14/1981 213713.99 Sales Associate -1454599667 41 Mark Wagner mwagner14@imageshack.us Male 78.141.201.64 5007660710388524 China 3/10/1987 207149.01 Staff Scientist -1454624139 42 Richard Armstrong rarmstrong15@baidu.com Male 229.173.184.111 3546008978147005 Indonesia 9/6/1961 52279.16 Software Engineer II -1454618327 43 Phillip Ellis pellis16@berkeley.edu Male 183.182.90.8 3561054399919267 Brazil 1/31/1994 59681.04 Analog Circuit Design manager \N -1454614376 44 Beverly Perry bperry17@nasa.gov Female 47.117.191.34 Vietnam 9/15/1983 41351.4 Database Administrator IV 1E+02 -1454559810 45 Carolyn Parker cparker18@soup.io Female 124.227.162.209 3555739550936724 Belarus 1/29/1988 162142.52 Chemical Engineer -1454605899 46 Martin Knight mknight19@umn.edu Male 173.169.240.26 5387225346178705 China 9/4/1994 200217.98 Assistant Professor -1454580952 47 Michael Stephens mstephens1a@altervista.org Male 181.48.175.67 Honduras 9/10/1958 248987 Environmental Specialist -1454545483 48 Frances Willis fwillis1b@linkedin.com 102.186.57.75 4175001067968122 Philippines 8/3/1998 \N VP Marketing -1454618611 49 Gary Fox gfox1c@paginegialle.it Male 80.221.129.42 Belgium 261175.89 -1454605416 50 Cynthia Bailey cbailey1d@microsoft.com Female 210.74.99.47 Indonesia 4/23/1989 38171.71 Sales Associate -1454547938 51 Terry Mitchell tmitchell1e@soundcloud.com Male 64.34.240.165 Peru 101626.65 -1454607980 52 Edward Webb ewebb1f@123-reg.co.uk Male 208.114.99.74 6386981481832436 Jordan 235457.76 1454544152 53 Ralph Simmons rsimmons1g@google.cn Male 180.159.250.232 3554040768947822 Pakistan 111413.03 -1454606074 54 Sara Kelly skelly1h@wix.com Female 97.243.219.196 3560161969850482 Portugal 12/11/1963 185788.86 Chief Design Engineer -1454577433 55 Donna Dean ddean1i@ftc.gov Female 91.232.196.181 Indonesia 285481.87 -1454545198 56 Jane Murray jmurray1j@apache.org Female 174.82.82.71 5100149053428994 China 7/15/1973 57832.83 Software Consultant -1454582927 57 Walter Cook wcook1k@webnode.com Male 4.223.17.187 5048374925679138 China 7/19/1979 164010.7 Accounting Assistant IV -1454553504 58 Bonnie Hanson bhanson1l@squidoo.com Female 209.131.133.80 3546400025538536 China 8/6/1989 207065.08 Recruiter -1454583403 59 Patrick Kelly pkelly1m@usgs.gov Male 92.132.67.51 30129138653846 Poland 10/22/1984 281404.55 Librarian -1454551706 60 George Ross gross1n@sciencedaily.com Male 77.33.183.49 201938854334636 Portugal 2/17/1986 96243.17 Teacher -1454572199 61 Joan Harvey jharvey1o@biglobe.ne.jp Female 244.175.30.138 5479197462183554 Indonesia 12/30/1974 269498 Nurse Practicioner åß∂ƒ©˙∆˚¬…æ -1454555502 62 Louise Stone lstone1p@1und1.de Female 230.79.20.66 Indonesia 1/14/1980 44528.64 Senior Editor -1454597662 63 Lawrence Pierce lpierce1q@ihg.com Male 35.230.80.125 6763027632739915 Indonesia 7/22/1982 269467.08 Human Resources Assistant IV -1454577961 64 Dorothy Gray dgray1r@vimeo.com Female 206.99.76.117 3582462082297450 China 10/8/1975 58802.03 Staff Scientist -1.00 -1454578138 65 Shawn Larson slarson1s@sohu.com Male 233.109.124.208 3557232712378033 Pakistan 6/11/1987 24566.92 Programmer I -1454620878 66 Ashley Carter acarter1t@weather.com Female 120.243.16.33 5641823823569006485 Philippines 2/4/1999 181594.54 Technical Writer -1454608592 67 Bruce Gonzalez bgonzalez1u@behance.net Male 213.165.12.93 5602219496203313 Sweden 6/27/1975 152915.03 Social Worker -1454570547 68 Gary Porter gporter1v@nhs.uk Male 113.26.17.148 3551504699131924 China 10/15/1988 239398.41 VP Sales åß∂ƒ©˙∆˚¬…æ -1454623375 69 Kimberly Bell kbell1w@techcrunch.com Female 232.188.203.114 06048433236353334 Tanzania 239482.42 " -1454580645 70 James Torres jtorres1x@rakuten.co.jp Male 42.70.136.181 Brazil 3/19/1968 66432.01 Information Systems Manager -1454565683 71 Cheryl Williams cwilliams1y@clickbank.net 24.11.168.130 Latvia 9/28/1958 \N Quality Control Specialist -1454572298 72 Diane Hicks dhicks1z@noaa.gov Female 220.185.241.90 36196827669213 Honduras 11/20/1977 104365.11 Systems Administrator I -1454630150 73 Judith Brown jbrown20@acquirethisname.com Female 173.62.110.176 Czech Republic 12/26/1994 218616.17 Safety Technician IV -1454550898 74 Jesse Dixon jdixon21@bloglines.com Male 156.125.120.208 Syria 277530.58 (╯°□°)╯︵ ┻━┻) -1454560223 75 Timothy Garza tgarza22@tmall.com Male 56.172.71.231 Poland 4/1/1978 21103.66 Desktop Support Technician ␡ -1454549446 76 Gloria Washington gwashington23@hud.gov Female 249.63.88.116 3528613230855766 Portugal 10/17/1960 175586.21 Information Systems Manager -1454555260 77 Patricia Bell pbell24@youtu.be Female 20.46.164.228 3528267541114924 Honduras 1/31/1999 47750.6 Payment Adjustment Coordinator -1454579807 78 Theresa Clark tclark25@wp.com Female 178.250.150.112 6396247540156151 Indonesia 10/10/1989 78319.93 Executive Secretary -1454629649 79 Matthew Matthews mmatthews26@typepad.com Male 33.186.230.54 5213341713953768 Azerbaijan 10/4/1990 12883.34 Help Desk Technician -1454568333 80 Betty White bwhite27@github.com Female 128.110.102.181 3572999005932624 Morocco 12/6/1980 30998.69 Operator -1454559489 81 Christina Nguyen cnguyen28@washingtonpost.com Female 63.57.110.32 36954036240279 Philippines 7/23/1984 259707.25 Project Manager -1454575575 82 Norma Stevens nstevens29@newyorker.com Female 148.35.34.31 Brazil 7/24/1984 233848.07 Professor -1454547659 83 Tammy Walker twalker2a@craigslist.org Female 115.94.89.2 4508955158259501 China 1/1/1972 241046.96 Community Outreach Specialist -1454559813 84 Mark Jackson mjackson2b@utexas.edu Male 136.242.153.66 36666130651082 Philippines 12/9/1957 245352.11 Account Executive 部落格 -1454547442 85 Scott Washington swashington2c@bloomberg.com Male 79.185.72.100 6395647151650882 Brazil 2/17/1957 240505.52 Professor -1454577775 86 Margaret Franklin mfranklin2d@mapy.cz Female 139.209.240.12 501835281527257384 Brazil 72758.49 -1454582451 87 Carolyn Wilson cwilson2e@hp.com Female 5.172.62.195 3581164938009805 France 1/19/1997 162909.64 Librarian -1454608782 88 Emily Cole ecole2f@epa.gov 97.83.153.33 Burkina Faso 5/3/1996 \N Accounting Assistant IV 1.00 -1454544809 89 Carolyn Gutierrez cgutierrez2g@smh.com.au Female 109.77.234.103 Madagascar 2/13/1999 139612.73 Nurse -1454591667 90 Jose Wallace jwallace2h@about.com Male 250.231.81.57 Philippines 12/17/1983 213500.16 Design Engineer -1454561119 91 Charles Reed creed2i@independent.co.uk Male 28.212.235.149 4017954848825528 China 88039.86 -1454615732 92 Brian Parker bparker2j@hugedomains.com Male 143.67.111.179 Portugal 1/18/1996 202446.54 Executive Secretary -1454613613 93 Donald Fox dfox2k@webs.com Male 251.61.52.170 3553498748210516 Indonesia 12/19/1975 134745.75 Human Resources Manager -1454603200 94 Jack West jwest2l@biblegateway.com Male 115.144.142.60 Poland 10/30/1956 245162.49 Office Assistant I 1.00 -1454574412 95 Doris Gomez dgomez2m@tinypic.com Female 156.173.76.213 4041593860679 Colombia 8/28/1977 164689.56 Speech Pathologist +1454544187 564 Christine Willis cwillisfn@pagesperso-orange.fr Female 166.102.221.213 3534808021291708 Russia 8/3/1991 112850.81 Desktop Support Technician +1454544201 40 Christine Alexander calexander13@aboutads.info Female 163.32.3.92 50183677518131890 China 1/14/1981 213713.99 Sales Associate +1454544213 992 Anna Dean adeanrj@netvibes.com Female 113.127.227.85 3586135192218451 Vietnam 5/29/1962 286181.88 Automation Specialist II +1454544238 601 Aaron Kim akimgo@mayoclinic.com Male 182.52.179.175 3587685548758112 Kazakhstan 11/6/1963 156217.14 Accounting Assistant I +1454544284 903 John Harris jharrisp2@goo.ne.jp Male 65.10.215.144 3565387100757980 China 6/7/1970 153671.44 Analog Circuit Design manager +1454544326 325 Billy Meyer bmeyer90@nature.com Male 163.186.10.162 3538589516492193 Colombia 7/20/1983 84716.67 Assistant Professor +1454544328 746 Christine Howell chowellkp@php.net Female 71.95.250.29 5100170292026399 China 1/11/1964 30533.25 Account Executive +1454544347 353 Alan Collins acollins9s@cpanel.net Male 16.99.94.145 3536005999242155 Guatemala 6/1/1980 38434.4 Software Test Engineer II ・( ̄∀ ̄)・:*: +1454544495 879 Marie Vasquez mvasquezoe@is.gd Female 101.194.66.108 3563730358790256 China 9/21/1958 12182.09 Nurse +1454544507 912 Evelyn Fisher efisherpb@soup.io Female 221.207.200.158 201473318880354 China 5/17/1998 208654.68 Geological Engineer +1454544523 923 Jessica George jgeorgepm@so-net.ne.jp Female 119.65.145.55 Russia 6/22/1965 73210.79 Nurse 1454544624 96 Brandon Owens bowens2n@si.edu Male 5.39.151.46 4591258400528650 France 3/13/1998 74028.68 Software Engineer III -1454596449 97 Evelyn Wagner ewagner2o@sbwire.com Female 84.231.120.250 3571837377153521 China 1/5/1965 78692.34 Operator +1454544685 617 Judith Bishop jbishoph4@weibo.com Female 50.167.35.101 3536263290947101 Taiwan 147732.13 (。◕ ∀ ◕。) +1454544809 89 Carolyn Gutierrez cgutierrez2g@smh.com.au Female 109.77.234.103 Madagascar 2/13/1999 139612.73 Nurse +1454544817 929 Harold Tucker htuckerps@stanford.edu Male 243.182.109.135 374622077056546 China 161472.14 +1454544819 590 Irene Larson ilarsongd@addthis.com Female 67.196.118.250 Syria 8/11/1969 222598.25 Business Systems Development Analyst  +1454544888 577 Frances Day fdayg0@ox.ac.uk Female 54.131.119.123 3534463936023182 Portugal 11/15/1969 206386.03 Environmental Specialist +1454544926 908 Bruce Banks bbanksp7@ifeng.com Male 3.58.102.49 560224852697998794 Indonesia 1/18/1983 146835.33 Professor +1454544936 951 Carolyn Lewis clewisqe@blogger.com Female 154.230.220.164 5469666950681032 Uruguay 11/25/1955 119686.8 Help Desk Technician +1454545198 56 Jane Murray jmurray1j@apache.org Female 174.82.82.71 5100149053428994 China 7/15/1973 57832.83 Software Consultant +1454545225 439 Keith Cook kcookc6@usa.gov Male 22.162.180.159 Poland 146503.61 +1454545268 358 Todd Meyer tmeyer9x@huffingtonpost.com Male 183.45.201.202 5593314243312813 China 7/31/1987 115187.5 Paralegal +1454545307 204 Lillian Long llong5n@skype.com Female 146.238.55.254 5641820612278798844 Czech Republic 6/18/1999 150598.38 Human Resources Assistant IV +1454545319 409 Doris Bishop dbishopbc@spotify.com Female 199.116.182.20 3575820879808061 Canada 11/29/1964 169913.1 Geological Engineer +1454545330 559 Eric West ewestfi@mapquest.com Male 229.67.66.9 3584340222063867 Italy 8/31/1998 59102.31 General Manager 1E2 +1454545330 702 Dennis Kelly dkellyjh@cargocollective.com Male 159.10.27.86 3586421938986530 China 3/22/1982 260296.17 Desktop Support Technician +1454545334 371 Gerald Russell grussellaa@last.fm 174.119.43.205 3545489024436298 Bahrain 12/21/2000 \N Senior Cost Accountant +1454545338 369 Judy Perez jpereza8@gmpg.org Female 109.68.19.234 5249772984361935 Philippines 7/9/1989 257973.8 Sales Associate +1454545351 178 Melissa Thomas mthomas4x@mysql.com Female 192.210.201.207 5562824139318432 Equatorial Guinea 8/17/1965 267092.73 Junior Executive +1454545414 831 Arthur Hill ahilln2@usnews.com Male 231.181.126.173 5602223371820245193 Colombia 3/25/1993 247436.07 Mechanical Systems Engineer +1454545426 170 Anne Oliver aoliver4p@jimdo.com Female 205.100.30.244 3530095445603833 Indonesia 3/31/1970 232499.96 Software Test Engineer III +1454545483 48 Frances Willis fwillis1b@linkedin.com 102.186.57.75 4175001067968122 Philippines 8/3/1998 \N VP Marketing +1454545502 478 Joshua Harrison jharrisond9@noaa.gov Male 231.249.108.195 30492555718355 Japan 11/24/1971 143815.22 Clinical Specialist 1454545547 98 Timothy Boyd tboyd2p@imdb.com Male 211.20.45.168 5602253132446507 Peru 7/8/1976 127883.56 Data Coordiator -1454549050 99 Edward Gilbert egilbert2q@ocn.ne.jp Male 237.183.200.242 3586807595028188 Bangladesh 8/30/1956 214872.75 Senior Financial Analyst ᠎ -1454583513 100 Howard Patterson hpatterson2r@toplist.cz Male 200.77.150.4 3558592437934298 China 7/9/1991 23607 Administrative Assistant IV +1454545556 148 Powell Female 77.50.112.73 5303311226469439 China 175168.8 +1454545565 998 Louis Lee lleerp@thetimes.co.uk Male 8.88.141.81 Russia 11/20/1982 13134.47 Office Assistant IV +1454545585 116 Lisa James ljames37@walmart.com 149.162.35.129 Sweden 3/19/1986 \N Graphic Designer +1454545601 269 Carlos Flores cflores7g@samsung.com Male 121.205.206.52 France 89368.56 +1454545680 197 Eugene Shaw eshaw5g@topsy.com Male 75.2.214.89 5602236558365152 France 11/25/1983 204106.08 Associate Professor +1454545733 536 Charles Welch cwelchev@paginegialle.it Male 135.156.127.116 3540766046216294 Bulgaria 11/26/1980 280230.13 Accountant II +1454545747 800 Sharon Crawford scrawfordm7@google.cn Female 185.219.127.5 5141634704661813 Pakistan 12/1/1980 14880.86 Clinical Specialist +1454545784 370 Martin Webb mwebba9@shutterfly.com Male 241.183.200.48 Portugal 5/28/1981 134676.08 Database Administrator III +1454545905 425 Wanda Olson wolsonbs@pen.io Female 136.216.93.167 3579427292475142 Slovenia 195983.76 +1454545917 158 Nelson Female 158.42.83.104 Nigeria 56092.93 +1454545926 144 Ruth Ryan rryan3z@reference.com Female 157.117.150.254 3580511168862041 Indonesia 9/9/1972 56717.9 Account Coordinator 999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +1454545928 432 Aaron Sims asimsbz@squidoo.com Male 176.74.122.74 3553550116250639 China 12/20/1992 245201.62 Recruiting Manager +1454545988 167 Frank Cunningham fcunningham4m@github.com Male 150.174.230.186 5602249442759621 France 4/17/1969 254828.23 Nuclear Power Engineer +1454546017 736 Doris Reyes dreyeskf@trellian.com Female 50.37.101.111 Russia 3/23/1967 48543.29 Electrical Engineer ١٢٣ +1454546051 696 Walter Baker wbakerjb@webmd.com Male 33.81.54.207 Poland 12/4/1985 257839.28 Occupational Therapist +1454546096 418 Nicole Weaver nweaverbl@yellowbook.com Female 178.127.204.49 6333547435590930225 Brazil 91251 +1454546121 521 Jesse Mccoy jmccoyeg@illinois.edu Male 77.2.76.98 5602212301270239 Indonesia 265697.47 +1454546176 324 Randy Perkins rperkins8z@spotify.com Male 90.152.116.122 4903530859961340 Canada 9/29/1982 59754.4 Programmer IV +1454546214 720 Daniel Roberts drobertsjz@blog.com Male 200.191.212.146 4917780904858553 Argentina 3/31/1965 151397.44 Analog Circuit Design manager +1454546253 279 Ernest Palmer epalmer7q@zdnet.com Male 24.129.157.239 5384992294623031 China 158317.63 +1454546269 937 Julia Hawkins jhawkinsq0@businesswire.com Female 41.247.95.119 Japan 52113.66 +1454546342 307 Phillip Mason pmason8i@hubpages.com Male 231.103.199.111 5602233897712483 China 277619.14 ␢ +1454546380 547 Benjamin Garcia bgarciaf6@spotify.com Male 151.228.6.14 3555896626891000 Macedonia 240109.95 +1454546423 616 Frances Hamilton fhamiltonh3@tamu.edu Female 188.88.34.240 Peru 3/19/1989 69117.34 Assistant Professor +1454546426 753 Raymond Harper rharperkw@facebook.com Male 148.46.64.54 5002351763645136 China 1/27/1980 191542.74 VP Accounting +1454546437 972 Bonnie Morrison bmorrisonqz@simplemachines.org 13.205.160.142 6763571935984496 Georgia 3/8/1973 \N Tax Accountant ../../../../../../../../../../../etc/passwd%00 +1454546468 185 Lisa Castillo lcastillo54@ebay.com Female 96.65.226.75 5100133275364427 Iran 4/8/1989 19003.55 Database Administrator I +1454546507 980 Marilyn Castillo mcastillor7@wikipedia.org Female 225.8.34.64 3560325383537120 Thailand 166569.16 +1454546551 293 Barbara Diaz bdiaz84@usnews.com Female 176.106.164.136 30109403344362 Egypt 11/25/1984 41388.68 Quality Control Specialist +1454546607 172 Christina Payne cpayne4r@umich.edu 208.172.251.134 3567551256592404 Hungary 5/9/1977 \N Quality Control Specialist +1454546678 454 Amy Phillips aphillipscl@blog.com Female 156.231.253.161 Russia 11/21/1997 136062.09 Environmental Tech ␣ +1454546732 792 Christine Howard chowardlz@prweb.com Female 69.22.66.149 Kosovo 3/10/1998 90266.03 Civil Engineer +1454546852 671 Juan Scott jscottim@theatlantic.com Male 170.84.164.52 3530364751135776 Indonesia 12/29/1979 127445.95 Assistant Professor +1454546865 878 Robin Matthews rmatthewsod@alexa.com Female 168.96.0.234 5108756854169874 China 11/17/1975 155909.78 Staff Accountant I +1454546874 578 Lisa Foster lfosterg1@va.gov Female 116.239.143.83 30550897409197 Canada 12/25/1980 282301.9 Product Engineer +1454546885 514 Clarence Gardner cgardnere9@addthis.com Male 241.164.83.193 3567799117668968 Mexico 2/8/1983 69661.64 Business Systems Development Analyst +1454546937 32 Roy Simmons rsimmonsv@telegraph.co.uk Male 21.20.158.183 5602244835346375 Mongolia 6/27/1994 13987.6 Senior Editor "<>?:""{}|_+" +1454546996 140 Christina Hanson chanson3v@seattletimes.com Female 154.87.3.146 3589004738797807 Peru 12/6/1994 157444.39 Budget/Accounting Analyst I +1454547050 714 Sean Shaw sshawjt@stumbleupon.com Male 190.171.138.84 4041370678096900 Portugal 11/13/1987 280420.03 Director of Sales +1454547183 440 David Dixon ddixonc7@google.es Male 102.192.92.231 3571723971536297 China 197005 ゚・✿ヾ╲(。◕‿◕。)╱✿・゚ +1454547190 109 Janice Edwards jedwards30@huffingtonpost.com Female 156.5.183.66 Czech Republic 9/3/1977 166805.79 Account Coordinator +1454547193 807 Helen Roberts hrobertsme@marketwatch.com Female 242.160.113.180 201415538184406 Armenia 9/30/1968 131695.03 Help Desk Technician +1454547199 6 Irene Wells iwells5@fema.gov Female 85.5.67.113 Iran 74337.42 +1454547206 629 Donna Crawford dcrawfordhg@google.fr Female 139.87.72.237 3548002968267145 Philippines 9/10/1974 120949.74 Senior Quality Engineer +1454547314 239 Terry Anderson tanderson6m@joomla.org Male 126.193.158.217 Slovenia 6/2/1988 241130.56 Senior Sales Associate +1454547413 874 Roger Armstrong rarmstrongo9@shop-pro.jp Male 176.127.63.161 Sweden 1/4/1969 195125.77 Environmental Tech +1454547442 85 Scott Washington swashington2c@bloomberg.com Male 79.185.72.100 6395647151650882 Brazil 2/17/1957 240505.52 Professor +1454547470 265 Ronald Simmons rsimmons7c@php.net Male 231.21.126.12 Colombia 5/12/1959 28563.27 Staff Accountant III +1454547497 574 Laura Lawson llawsonfx@disqus.com Female 227.157.239.115 5108755030972003 Mongolia 6/17/1987 192790.7 Sales Representative ../../../../../../../../../../../etc/hosts +1454547546 582 Medina Male 230.187.35.16 China 87740.62 +1454547580 868 Todd Simmons tsimmonso3@amazon.co.uk Male 232.231.42.85 Peru 1/28/1977 70099.6 Sales Associate NULL +1454547632 421 Sara Murray smurraybo@instagram.com Female 83.32.41.79 Mongolia 3/2/1972 21859.35 Research Associate +1454547659 83 Tammy Walker twalker2a@craigslist.org Female 115.94.89.2 4508955158259501 China 1/1/1972 241046.96 Community Outreach Specialist +1454547745 476 Norma Palmer npalmerd7@etsy.com Female 24.81.30.107 6759877990739668322 China 2/22/1974 273005.88 Executive Secretary +1454547823 333 Ruth Ryan rryan98@gov.uk Female 165.226.217.32 6771454237379758 Philippines 4/25/1993 246324.26 Staff Accountant I +1454547897 523 Raymond Green rgreenei@sciencedaily.com Male 129.154.223.20 5020525177159002 Brazil 7/25/1966 217735.34 Sales Associate +1454547914 626 Steven Cooper scooperhd@home.pl Male 226.75.17.73 30583351914956 United States 4/22/2000 174475.39 Web Developer II +1454547938 51 Terry Mitchell tmitchell1e@soundcloud.com Male 64.34.240.165 Peru 101626.65 +1454547979 282 Lisa Romero lromero7t@pinterest.com Female 54.113.22.9 Portugal 224233.61 +1454548111 899 Raymond Payne rpayneoy@purevolume.com Male 170.237.246.144 201978019687940 Philippines 1/21/1993 126392.14 Staff Accountant I +1454548272 966 Kevin Martin kmartinqt@hostgator.com Male 87.47.66.144 3550408592420163 Sweden 10/24/1965 213135.46 Senior Sales Associate +1454548342 846 Keith Taylor ktaylornh@about.me Male 90.199.26.239 4175007392203366 South Africa 2/4/1990 64012.82 Associate Professor +1454548358 164 Lawrence Johnston ljohnston4j@businessweek.com Male 150.125.123.49 China 6/14/1993 243318.68 Design Engineer +1454548489 550 Brandon Owens bowensf9@wired.com Male 220.236.132.34 Vietnam 271248.99 === Try load data from v0.7.1.all-named-index.parquet +0.21 59.8 61 326 3.89 3.84 2.31 Premium E SI1 0.22 65.1 61 337 3.87 3.78 2.49 Fair E VS2 0.23 56.9 65 327 4.05 4.07 2.31 Good E VS1 -0.31 63.3 58 335 4.34 4.35 2.75 Good J SI2 -0.23 61.5 55 326 3.95 3.98 2.43 Ideal E SI2 -0.21 59.8 61 326 3.89 3.84 2.31 Premium E SI1 -0.29 62.4 58 334 4.2 4.23 2.63 Premium I VS2 -0.26 61.9 55 337 4.07 4.11 2.53 Very Good H SI1 0.23 59.4 61 338 4 4.05 2.39 Very Good H VS1 +0.23 61.5 55 326 3.95 3.98 2.43 Ideal E SI2 0.24 62.3 57 336 3.95 3.98 2.47 Very Good I VVS1 0.24 62.8 57 336 3.94 3.96 2.48 Very Good J VVS2 +0.26 61.9 55 337 4.07 4.11 2.53 Very Good H SI1 +0.29 62.4 58 334 4.2 4.23 2.63 Premium I VS2 +0.31 63.3 58 335 4.34 4.35 2.75 Good J SI2 === Try load data from v0.7.1.column-metadata-handling.parquet 1 0.1 2017-01-01 02:00:00 a 2017-01-01 02:00:00 2 0.2 2017-01-02 02:00:00 b 2017-01-02 02:00:00 3 0.3 2017-01-03 02:00:00 c 2017-01-03 02:00:00 === Try load data from v0.7.1.parquet -0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 0 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1 +0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 8 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 2 +0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 0 +0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 9 +0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 6 +0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 5 +0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 7 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 3 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 4 -0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 5 -0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 6 -0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 7 -0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 8 -0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 9 === Try load data from v0.7.1.some-named-index.parquet +0.21 59.8 61 326 3.89 3.84 2.31 Premium E SI1 0.22 65.1 61 337 3.87 3.78 2.49 Fair E VS2 0.23 56.9 65 327 4.05 4.07 2.31 Good E VS1 -0.31 63.3 58 335 4.34 4.35 2.75 Good J SI2 -0.23 61.5 55 326 3.95 3.98 2.43 Ideal E SI2 -0.21 59.8 61 326 3.89 3.84 2.31 Premium E SI1 -0.29 62.4 58 334 4.2 4.23 2.63 Premium I VS2 -0.26 61.9 55 337 4.07 4.11 2.53 Very Good H SI1 0.23 59.4 61 338 4 4.05 2.39 Very Good H VS1 +0.23 61.5 55 326 3.95 3.98 2.43 Ideal E SI2 0.24 62.3 57 336 3.95 3.98 2.47 Very Good I VVS1 0.24 62.8 57 336 3.94 3.96 2.48 Very Good J VVS2 +0.26 61.9 55 337 4.07 4.11 2.53 Very Good H SI1 +0.29 62.4 58 334 4.2 4.23 2.63 Premium I VS2 +0.31 63.3 58 335 4.34 4.35 2.75 Good J SI2 diff --git a/tests/queries/0_stateless/00900_long_parquet_load.sh b/tests/queries/0_stateless/00900_long_parquet_load.sh index 8e6ea24edb4..8142c5b5810 100755 --- a/tests/queries/0_stateless/00900_long_parquet_load.sh +++ b/tests/queries/0_stateless/00900_long_parquet_load.sh @@ -66,6 +66,6 @@ EOF # Some files contain unsupported data structures, exception is ok. cat "$DATA_DIR"/"$NAME" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/' - ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load LIMIT 100" + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load ORDER BY tuple(*) LIMIT 100" ${CLICKHOUSE_CLIENT} --query="DROP TABLE parquet_load" done diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 72e0df5bfda..be8e603f8dc 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -108,6 +108,12 @@ true Bool 123456789012 UInt64 0 UInt64 0 Int8 +{'a':'hello','b':'world'} +{'a':'hello','b':'world'} +{'a':('hello',100),'b':('world',200)} +{'a':[100,200],'b':[-100,200,300]} +{'a':{'c':'hello'},'b':{'d':'world'}} +{'c':'hello'} --JSONExtractKeysAndValues-- [('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] @@ -152,6 +158,7 @@ e u v --show error: type should be const string +--show error: key of map type should be String --allow_simdjson=0-- --JSONLength-- 2 @@ -217,6 +224,12 @@ Friday (3,0) (3,5) (3,0) +{'a':'hello','b':'world'} +{'a':'hello','b':'world'} +{'a':('hello',100),'b':('world',200)} +{'a':[100,200],'b':[-100,200,300]} +{'a':{'c':'hello'},'b':{'d':'world'}} +{'c':'hello'} --JSONExtractKeysAndValues-- [('a','hello'),('b','[-100,200,300]')] [('b',[-100,200,300])] @@ -266,3 +279,4 @@ u v --show error: type should be const string --show error: index type should be integer +--show error: key of map type should be String diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index 3314a5b762d..16cc72f7fdc 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -123,6 +123,13 @@ SELECT JSONExtract('{"a": "123456789012.345"}', 'a', 'UInt64') as a, toTypeName( SELECT JSONExtract('{"a": "-2000.22"}', 'a', 'UInt64') as a, toTypeName(a); SELECT JSONExtract('{"a": "-2000.22"}', 'a', 'Int8') as a, toTypeName(a); +SELECT JSONExtract('{"a": "hello", "b": "world"}', 'Map(String, String)'); +SELECT JSONExtract('{"a": "hello", "b": "world"}', 'Map(LowCardinality(String), String)'); +SELECT JSONExtract('{"a": ["hello", 100.0], "b": ["world", 200]}', 'Map(String, Tuple(String, Float64))'); +SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(String, Array(Float64))'); +SELECT JSONExtract('{"a": {"c": "hello"}, "b": {"d": "world"}}', 'Map(String, Map(String, String))'); +SELECT JSONExtract('{"a": {"c": "hello"}, "b": {"d": "world"}}', 'a', 'Map(String, String)'); + SELECT '--JSONExtractKeysAndValues--'; SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'String'); SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Float64)'); @@ -166,8 +173,11 @@ SELECT JSONExtractString('["a", "b", "c", "d", "e"]', idx) FROM (SELECT arrayJoi SELECT JSONExtractString(json, 's') FROM (SELECT arrayJoin(['{"s":"u"}', '{"s":"v"}']) AS json); SELECT '--show error: type should be const string'; -SELECT JSONExtractKeysAndValues([], JSONLength('^?V{LSwp')); -- { serverError 44 } -WITH '{"i": 1, "f": 1.2}' AS json SELECT JSONExtract(json, 'i', JSONType(json, 'i')); -- { serverError 44 } +SELECT JSONExtractKeysAndValues([], JSONLength('^?V{LSwp')); -- { serverError ILLEGAL_COLUMN } +WITH '{"i": 1, "f": 1.2}' AS json SELECT JSONExtract(json, 'i', JSONType(json, 'i')); -- { serverError ILLEGAL_COLUMN } + +SELECT '--show error: key of map type should be String'; +SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT '--allow_simdjson=0--'; @@ -247,6 +257,13 @@ SELECT JSONExtract('{"a":3}', 'Tuple(Int, Int)'); SELECT JSONExtract('[3,5,7]', 'Tuple(Int, Int)'); SELECT JSONExtract('[3]', 'Tuple(Int, Int)'); +SELECT JSONExtract('{"a": "hello", "b": "world"}', 'Map(String, String)'); +SELECT JSONExtract('{"a": "hello", "b": "world"}', 'Map(LowCardinality(String), String)'); +SELECT JSONExtract('{"a": ["hello", 100.0], "b": ["world", 200]}', 'Map(String, Tuple(String, Float64))'); +SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(String, Array(Float64))'); +SELECT JSONExtract('{"a": {"c": "hello"}, "b": {"d": "world"}}', 'Map(String, Map(String, String))'); +SELECT JSONExtract('{"a": {"c": "hello"}, "b": {"d": "world"}}', 'a', 'Map(String, String)'); + SELECT '--JSONExtractKeysAndValues--'; SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'String'); SELECT JSONExtractKeysAndValues('{"a": "hello", "b": [-100, 200.0, 300]}', 'Array(Float64)'); @@ -295,8 +312,11 @@ SELECT JSONExtractString('["a", "b", "c", "d", "e"]', idx) FROM (SELECT arrayJoi SELECT JSONExtractString(json, 's') FROM (SELECT arrayJoin(['{"s":"u"}', '{"s":"v"}']) AS json); SELECT '--show error: type should be const string'; -SELECT JSONExtractKeysAndValues([], JSONLength('^?V{LSwp')); -- { serverError 44 } -WITH '{"i": 1, "f": 1.2}' AS json SELECT JSONExtract(json, 'i', JSONType(json, 'i')); -- { serverError 44 } +SELECT JSONExtractKeysAndValues([], JSONLength('^?V{LSwp')); -- { serverError ILLEGAL_COLUMN } +WITH '{"i": 1, "f": 1.2}' AS json SELECT JSONExtract(json, 'i', JSONType(json, 'i')); -- { serverError ILLEGAL_COLUMN } SELECT '--show error: index type should be integer'; -SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverError 43 } +SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT '--show error: key of map type should be String'; +SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python index 2706c0f5b12..3db80aef845 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python @@ -83,7 +83,7 @@ CAST(N as DateTime64(9, 'Europe/Minsk')) # CAST(N as DateTime64(12, 'Asia/Istanbul')) # DateTime64(18) will always fail due to zero precision, but it is Ok to test here: # CAST(N as DateTime64(18, 'Asia/Istanbul')) -formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul') +formatDateTime(N, '%C %d %D %e %F %H %I %j %m %i %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul') """.splitlines() # Expanded later to cartesian product of all arguments, using format string. diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference index 8a168ed0e9e..2f56230db37 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference @@ -49,7 +49,7 @@ Code: 43 "UInt8",11 ------------------------------------------ SELECT toUnixTimestamp(N) -Code: 44 +"UInt32",1568592000 "UInt32",1568650811 "UInt32",1568650811 ------------------------------------------ @@ -135,13 +135,13 @@ Code: 43 ------------------------------------------ SELECT date_trunc(\'year\', N, \'Asia/Istanbul\') Code: 43 -"Date","2019-01-01" -"Date","2019-01-01" +"DateTime('Asia/Istanbul')","2019-01-01 00:00:00" +"DateTime('Asia/Istanbul')","2019-01-01 00:00:00" ------------------------------------------ SELECT date_trunc(\'month\', N, \'Asia/Istanbul\') Code: 43 -"Date","2019-09-01" -"Date","2019-09-01" +"DateTime('Asia/Istanbul')","2019-09-01 00:00:00" +"DateTime('Asia/Istanbul')","2019-09-01 00:00:00" ------------------------------------------ SELECT date_trunc(\'day\', N, \'Asia/Istanbul\') "DateTime('Asia/Istanbul')","2019-09-16 00:00:00" @@ -353,7 +353,7 @@ SELECT CAST(N as DateTime64(9, \'Europe/Minsk\')) "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.000000000" "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.234000000" ------------------------------------------ -SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\', \'Asia/Istanbul\') +SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %i %p %R %S %T %u %V %w %y %Y %%\', \'Asia/Istanbul\') "String","20 16 09/16/19 16 2019-09-16 00 12 259 09 00 AM 00:00 00 00:00:00 1 38 1 19 2019 %" "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %" "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %" diff --git a/tests/queries/0_stateless/00926_adaptive_index_granularity_merge_tree.sql b/tests/queries/0_stateless/00926_adaptive_index_granularity_merge_tree.sql index e812a6cae5c..3512a0fb8db 100644 --- a/tests/queries/0_stateless/00926_adaptive_index_granularity_merge_tree.sql +++ b/tests/queries/0_stateless/00926_adaptive_index_granularity_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings ----- Group of very similar simple tests ------ select '----HORIZONTAL MERGE TESTS----'; diff --git a/tests/queries/0_stateless/00926_adaptive_index_granularity_pk.sql b/tests/queries/0_stateless/00926_adaptive_index_granularity_pk.sql index 636cd6589ce..2a7b6934e9a 100644 --- a/tests/queries/0_stateless/00926_adaptive_index_granularity_pk.sql +++ b/tests/queries/0_stateless/00926_adaptive_index_granularity_pk.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings SET send_logs_level = 'fatal'; SELECT '----00489----'; diff --git a/tests/queries/0_stateless/00926_adaptive_index_granularity_replacing_merge_tree.sql b/tests/queries/0_stateless/00926_adaptive_index_granularity_replacing_merge_tree.sql index a5170aa8058..20b2e3dba6d 100644 --- a/tests/queries/0_stateless/00926_adaptive_index_granularity_replacing_merge_tree.sql +++ b/tests/queries/0_stateless/00926_adaptive_index_granularity_replacing_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings ----- Group of very similar simple tests ------ DROP TABLE IF EXISTS zero_rows_per_granule; diff --git a/tests/queries/0_stateless/00933_alter_ttl.sql b/tests/queries/0_stateless/00933_alter_ttl.sql index 934d33660de..b0e697d024b 100644 --- a/tests/queries/0_stateless/00933_alter_ttl.sql +++ b/tests/queries/0_stateless/00933_alter_ttl.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - set send_logs_level = 'fatal'; drop table if exists ttl; diff --git a/tests/queries/0_stateless/00933_reserved_word.sql b/tests/queries/0_stateless/00933_reserved_word.sql index 3a26bc6ac98..447a618bfc9 100644 --- a/tests/queries/0_stateless/00933_reserved_word.sql +++ b/tests/queries/0_stateless/00933_reserved_word.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS reserved_word_table; CREATE TABLE reserved_word_table (`index` UInt8) ENGINE = MergeTree ORDER BY `index`; diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference index f1839bae259..e142c6c79fe 100644 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference @@ -1 +1,3 @@ +99999 +99999 0 0 13 diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh index 390d6a70ef1..7bf4a88e972 100755 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh @@ -13,15 +13,24 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE small_table (a UInt64 default 0, n UInt $CLICKHOUSE_CLIENT --query="INSERT INTO small_table (n) SELECT * from system.numbers limit 100000;" $CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE small_table FINAL;" -cached_query="SELECT count() FROM small_table where n > 0;" +cached_query="SELECT count() FROM small_table WHERE n > 0;" -$CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query="$cached_query" &> /dev/null - -$CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --allow_prefetched_read_pool_for_remote_filesystem=0 --allow_prefetched_read_pool_for_local_filesystem=0 --query_id="test-query-uncompressed-cache" --query="$cached_query" &> /dev/null +$CLICKHOUSE_CLIENT --log_queries 1 --use_uncompressed_cache 1 --query="$cached_query" +$CLICKHOUSE_CLIENT --log_queries 1 --use_uncompressed_cache 1 --allow_prefetched_read_pool_for_remote_filesystem 0 --allow_prefetched_read_pool_for_local_filesystem 0 --query_id="test-query-uncompressed-cache" --query="$cached_query" $CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" - -$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents['Seek'], ProfileEvents['ReadCompressedBytes'], ProfileEvents['UncompressedCacheHits'] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') and current_database = currentDatabase() AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1" +$CLICKHOUSE_CLIENT --query=" + SELECT + ProfileEvents['Seek'], + ProfileEvents['ReadCompressedBytes'], + ProfileEvents['UncompressedCacheHits'] AS hit + FROM system.query_log + WHERE query_id = 'test-query-uncompressed-cache' + AND current_database = currentDatabase() + AND type = 2 + AND event_date >= yesterday() + ORDER BY event_time DESC + LIMIT 1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table" diff --git a/tests/queries/0_stateless/00933_ttl_with_default.sql b/tests/queries/0_stateless/00933_ttl_with_default.sql index 5504e33235c..e6c0a6e700c 100644 --- a/tests/queries/0_stateless/00933_ttl_with_default.sql +++ b/tests/queries/0_stateless/00933_ttl_with_default.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - drop table if exists ttl_00933_2; create table ttl_00933_2 (d DateTime, a Int default 111 ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d); diff --git a/tests/queries/0_stateless/00936_function_result_with_operator_in.sql b/tests/queries/0_stateless/00936_function_result_with_operator_in.sql index dfb19c1f3ec..0b253021f39 100644 --- a/tests/queries/0_stateless/00936_function_result_with_operator_in.sql +++ b/tests/queries/0_stateless/00936_function_result_with_operator_in.sql @@ -22,8 +22,8 @@ SELECT 'a' IN splitByChar('c', 'abcdef'); SELECT 'errors:'; -- non-constant expressions in the right side of IN -SELECT count() FROM samples WHERE 1 IN range(samples.value); -- { serverError 47 } -SELECT count() FROM samples WHERE 1 IN range(rand() % 1000); -- { serverError 36 } +SELECT count() FROM samples WHERE 1 IN range(samples.value); -- { serverError 1, 47 } +SELECT count() FROM samples WHERE 1 IN range(rand() % 1000); -- { serverError 1, 36 } -- index is not used SELECT count() FROM samples WHERE value IN range(3); -- { serverError 277 } @@ -31,4 +31,4 @@ SELECT count() FROM samples WHERE value IN range(3); -- { serverError 277 } -- wrong type SELECT 123 IN splitByChar('c', 'abcdef'); -- { serverError 53 } -DROP TABLE samples; \ No newline at end of file +DROP TABLE samples; diff --git a/tests/queries/0_stateless/00955_test_final_mark.sql b/tests/queries/0_stateless/00955_test_final_mark.sql index 44eb4a69c2d..f29c61eef4c 100644 --- a/tests/queries/0_stateless/00955_test_final_mark.sql +++ b/tests/queries/0_stateless/00955_test_final_mark.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-merge-tree-settings +-- Tags: no-random-merge-tree-settings SET send_logs_level = 'fatal'; diff --git a/tests/queries/0_stateless/00955_test_final_mark_use.sh b/tests/queries/0_stateless/00955_test_final_mark_use.sh index b1bccd2d6e6..2c2589c83bd 100755 --- a/tests/queries/0_stateless/00955_test_final_mark_use.sh +++ b/tests/queries/0_stateless/00955_test_final_mark_use.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-random-merge-tree-settings +# Tags: no-random-merge-tree-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00960_live_view_watch_events_live.py b/tests/queries/0_stateless/00960_live_view_watch_events_live.py deleted file mode 100755 index 46c561516ba..00000000000 --- a/tests/queries/0_stateless/00960_live_view_watch_events_live.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - client1.send("WATCH test.lv EVENTS") - client1.expect("version") - client1.expect("1.*" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect("2.*" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") - client1.expect("3.*" + end_of_block) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py deleted file mode 100755 index ac399d3c4c8..00000000000 --- a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send( - "CREATE LIVE VIEW test.lv WITH REFRESH 1" - " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'" - ) - client1.expect(prompt) - client1.send("WATCH test.lv FORMAT JSONEachRow") - client1.expect(r'"_version":' + end_of_block) - client1.expect(r'"_version":' + end_of_block) - client1.expect(r'"_version":' + end_of_block) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py deleted file mode 100755 index 983b330e24a..00000000000 --- a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import time -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send( - "CREATE LIVE VIEW test.lv WITH REFRESH 1" - " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'" - ) - client1.expect(prompt) - client1.send("WATCH test.lv FORMAT JSONEachRow") - client1.expect(r'"_version":' + end_of_block) - client1.expect(r'"_version":' + end_of_block) - client1.expect(r'"_version":' + end_of_block) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py deleted file mode 100755 index 9d2a26c83c0..00000000000 --- a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("DROP DICTIONARY IF EXITS test.dict") - client1.expect(prompt) - - client1.send( - "CREATE TABLE test.mt (a Int32, b Int32) Engine=MergeTree order by tuple()" - ) - client1.expect(prompt) - client1.send( - "CREATE DICTIONARY test.dict(a Int32, b Int32) PRIMARY KEY a LAYOUT(FLAT()) " - + "SOURCE(CLICKHOUSE(db 'test' table 'mt')) LIFETIME(1)" - ) - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1 AS SELECT * FROM test.dict") - client1.expect(prompt) - - client2.send("INSERT INTO test.mt VALUES (1,2)") - client2.expect(prompt) - - client1.send("WATCH test.lv FORMAT JSONEachRow") - client1.expect(r'"_version":"1"') - - client2.send("INSERT INTO test.mt VALUES (2,2)") - client2.expect(prompt) - client1.expect(r'"_version":"2"') - - client2.send("INSERT INTO test.mt VALUES (3,2)") - client2.expect(prompt) - client1.expect(r'"_version":"3"') - - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP DICTIONARY IF EXISTS test.dict") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py b/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py deleted file mode 100755 index c8902203a3d..00000000000 --- a/tests/queries/0_stateless/00964_live_view_watch_events_heartbeat.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("SET live_view_heartbeat_interval=1") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - client1.send("WATCH test.lv EVENTS FORMAT CSV") - client1.expect("Progress: 1.00 rows.*\)") - client2.send("INSERT INTO test.mt VALUES (1)") - client2.expect(prompt) - client1.expect("Progress: 2.00 rows.*\)") - client2.send("INSERT INTO test.mt VALUES (2),(3)") - client2.expect(prompt) - # wait for heartbeat - client1.expect("Progress: 3.00 rows.*\)") - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py b/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py deleted file mode 100755 index b499f673cc0..00000000000 --- a/tests/queries/0_stateless/00965_live_view_watch_heartbeat.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("SET live_view_heartbeat_interval=1") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - client1.send("WATCH test.lv") - client1.expect("_version") - client1.expect(r"0.*1" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(r"6.*2" + end_of_block) - # wait for heartbeat - client1.expect("Progress: 2.00 rows.*\)") - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00965_logs_level_bugfix.reference b/tests/queries/0_stateless/00965_logs_level_bugfix.reference index 52396b3fe79..affd41b780b 100644 --- a/tests/queries/0_stateless/00965_logs_level_bugfix.reference +++ b/tests/queries/0_stateless/00965_logs_level_bugfix.reference @@ -2,7 +2,6 @@ . . - . - diff --git a/tests/queries/0_stateless/00966_live_view_watch_events_http.py b/tests/queries/0_stateless/00966_live_view_watch_events_http.py deleted file mode 100755 index 1f2ddae23d6..00000000000 --- a/tests/queries/0_stateless/00966_live_view_watch_events_http.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1: - client1.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - - try: - with http_client( - { - "method": "GET", - "url": "/?allow_experimental_live_view=1&query=WATCH%20test.lv%20EVENTS", - }, - name="client2>", - log=log, - ) as client2: - client2.expect(".*1\n") - client1.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(prompt) - client2.expect(".*2\n") - finally: - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00967_live_view_watch_http.py b/tests/queries/0_stateless/00967_live_view_watch_http.py deleted file mode 100755 index 92e192cc7f2..00000000000 --- a/tests/queries/0_stateless/00967_live_view_watch_http.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1: - client1.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - - try: - with http_client( - { - "method": "GET", - "url": "/?allow_experimental_live_view=1&query=WATCH%20test.lv", - }, - name="client2>", - log=log, - ) as client2: - client2.expect(".*0\t1\n") - client1.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(prompt) - client2.expect(".*6\t2\n") - finally: - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference b/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference deleted file mode 100644 index 5f48ead3147..00000000000 --- a/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.reference +++ /dev/null @@ -1,4 +0,0 @@ -{"row":{"a":1}} -{"row":{"a":2}} -{"row":{"a":3}} -{"progress":{"read_rows":"3","read_bytes":"36","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}} diff --git a/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql b/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql deleted file mode 100644 index 77b0f37e10f..00000000000 --- a/tests/queries/0_stateless/00968_live_view_select_format_jsoneachrowwithprogress.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM mt; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT * FROM lv FORMAT JSONEachRowWithProgress; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference b/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference deleted file mode 100644 index 80ec35990d6..00000000000 --- a/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.reference +++ /dev/null @@ -1,6 +0,0 @@ -{"row":{"sum(a)":"0","_version":"1"}} -{"progress":{"read_rows":"1","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}} -{"row":{"sum(a)":"6","_version":"2"}} -{"progress":{"read_rows":"1","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}} -{"row":{"sum(a)":"21","_version":"3"}} -{"progress":{"read_rows":"1","read_bytes":"16","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}} diff --git a/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql b/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql deleted file mode 100644 index 2d5d5f48b34..00000000000 --- a/tests/queries/0_stateless/00969_live_view_watch_format_jsoneachrowwithprogress.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt; - -WATCH lv LIMIT 0 FORMAT JSONEachRowWithProgress; - -INSERT INTO mt VALUES (1),(2),(3); - -WATCH lv LIMIT 0 FORMAT JSONEachRowWithProgress; - -INSERT INTO mt VALUES (4),(5),(6); - -WATCH lv LIMIT 0 FORMAT JSONEachRowWithProgress; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py b/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py deleted file mode 100755 index febe439e63b..00000000000 --- a/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1: - client1.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - - with http_client( - { - "method": "GET", - "url": "/?allow_experimental_live_view=1&live_view_heartbeat_interval=1&query=WATCH%20test.lv%20EVENTS%20FORMAT%20JSONEachRowWithProgress", - }, - name="client2>", - log=log, - ) as client2: - client2.expect( - '{"progress":{"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}}\n', - escape=True, - ) - client2.expect('{"row":{"version":"1"}', escape=True) - client2.expect( - '{"progress":{"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}}', - escape=True, - ) - # heartbeat is provided by progress message - client2.expect( - '{"progress":{"read_rows":"1","read_bytes":"8","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","result_rows":"0","result_bytes":"0"}}', - escape=True, - ) - - client1.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(prompt) - - client2.expect('{"row":{"version":"2"}}\n', escape=True) - - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py b/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py deleted file mode 100755 index 117f7b7c786..00000000000 --- a/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block -from httpclient import client as http_client - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1: - client1.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - - with http_client( - { - "method": "GET", - "url": "/?allow_experimental_live_view=1&live_view_heartbeat_interval=1&query=WATCH%20test.lv%20FORMAT%20JSONEachRowWithProgress", - }, - name="client2>", - log=log, - ) as client2: - client2.expect( - '"progress".*', - ) - client2.expect('{"row":{"sum(a)":"0","_version":"1"}}\n', escape=True) - client2.expect('"progress".*\n') - # heartbeat is provided by progress message - client2.expect('"progress".*\n') - - client1.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(prompt) - - client2.expect('"progress".*"read_rows":"2".*\n') - client2.expect('{"row":{"sum(a)":"6","_version":"2"}}\n', escape=True) - - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00972_live_view_select_1.sql b/tests/queries/0_stateless/00972_live_view_select_1.sql deleted file mode 100644 index 20344466856..00000000000 --- a/tests/queries/0_stateless/00972_live_view_select_1.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; - -CREATE LIVE VIEW lv AS SELECT 1; - -SELECT * FROM lv; - -DROP TABLE lv; diff --git a/tests/queries/0_stateless/00973_live_view_select.reference b/tests/queries/0_stateless/00973_live_view_select.reference deleted file mode 100644 index 75236c0daf7..00000000000 --- a/tests/queries/0_stateless/00973_live_view_select.reference +++ /dev/null @@ -1,4 +0,0 @@ -6 1 -6 1 -12 2 -12 2 diff --git a/tests/queries/0_stateless/00973_live_view_select.sql b/tests/queries/0_stateless/00973_live_view_select.sql deleted file mode 100644 index 0844c730b26..00000000000 --- a/tests/queries/0_stateless/00973_live_view_select.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference deleted file mode 100644 index 3a6fe59ae6d..00000000000 --- a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference +++ /dev/null @@ -1,2 +0,0 @@ -5 1 -10 2 diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql deleted file mode 100644 index d0d27464618..00000000000 --- a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql +++ /dev/null @@ -1,28 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS lv2; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) AS sum_a FROM mt PREWHERE a > 1; -CREATE LIVE VIEW lv2 AS SELECT sum(number) AS sum_number FROM system.numbers PREWHERE number > 1; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv PREWHERE sum_a > 5; -- { serverError 182 } - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv PREWHERE sum_a > 10; -- { serverError 182 } - -SELECT *,_version FROM lv2; -- { serverError 182 } -SELECT *,_version FROM lv2 PREWHERE sum_number > 10; -- { serverError 182 } - -DROP TABLE lv; -DROP TABLE lv2; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select.reference deleted file mode 100644 index ebf18a51290..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select.reference +++ /dev/null @@ -1,18 +0,0 @@ -1 1 -2 1 -3 1 -1 1 -2 1 -3 1 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select.sql deleted file mode 100644 index 9d644f245f6..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT a FROM (SELECT a FROM mt); - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_join.reference deleted file mode 100644 index 7a596e87ed6..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join.reference +++ /dev/null @@ -1,6 +0,0 @@ -1 hello 2 -1 hello 2 -1 hello 3 -2 hello 3 -1 hello 3 -2 hello 3 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_join.sql deleted file mode 100644 index 562e6df1bfd..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join.sql +++ /dev/null @@ -1,30 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS A; -DROP TABLE IF EXISTS B; - -CREATE TABLE A (id Int32) Engine=Memory; -CREATE TABLE B (id Int32, name String) Engine=Memory; - -CREATE LIVE VIEW lv AS SELECT id, name FROM ( SELECT A.id, B.name FROM A as A, B as B WHERE A.id = B.id ); - -SELECT * FROM lv; - -INSERT INTO A VALUES (1); -INSERT INTO B VALUES (1, 'hello'); - -SELECT *,_version FROM lv ORDER BY id; -SELECT *,_version FROM lv ORDER BY id; - -INSERT INTO A VALUES (2) -INSERT INTO B VALUES (2, 'hello') - -SELECT *,_version FROM lv ORDER BY id; -SELECT *,_version FROM lv ORDER BY id; - -DROP TABLE lv; -DROP TABLE A; -DROP TABLE B; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join_no_alias.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_join_no_alias.reference deleted file mode 100644 index 7a596e87ed6..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join_no_alias.reference +++ /dev/null @@ -1,6 +0,0 @@ -1 hello 2 -1 hello 2 -1 hello 3 -2 hello 3 -1 hello 3 -2 hello 3 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join_no_alias.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_join_no_alias.sql deleted file mode 100644 index 60078ccff30..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_join_no_alias.sql +++ /dev/null @@ -1,30 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS A; -DROP TABLE IF EXISTS B; - -CREATE TABLE A (id Int32) Engine=Memory; -CREATE TABLE B (id Int32, name String) Engine=Memory; - -CREATE LIVE VIEW lv AS SELECT id, name FROM ( SELECT A.id, B.name FROM A, B WHERE A.id = B.id); - -SELECT * FROM lv; - -INSERT INTO A VALUES (1); -INSERT INTO B VALUES (1, 'hello'); - -SELECT *,_version FROM lv ORDER BY id; -SELECT *,_version FROM lv ORDER BY id; - -INSERT INTO A VALUES (2) -INSERT INTO B VALUES (2, 'hello') - -SELECT *,_version FROM lv ORDER BY id; -SELECT *,_version FROM lv ORDER BY id; - -DROP TABLE lv; -DROP TABLE A; -DROP TABLE B; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested.reference deleted file mode 100644 index ebf18a51290..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested.reference +++ /dev/null @@ -1,18 +0,0 @@ -1 1 -2 1 -3 1 -1 1 -2 1 -3 1 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested.sql deleted file mode 100644 index 700de5955f9..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT a FROM ( SELECT * FROM ( SELECT a FROM (SELECT a FROM mt) ) ); - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation.reference deleted file mode 100644 index 75236c0daf7..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation.reference +++ /dev/null @@ -1,4 +0,0 @@ -6 1 -6 1 -12 2 -12 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation.sql deleted file mode 100644 index 3a9408a62fa..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM ( SELECT sum(a) FROM ( SELECT a FROM (SELECT a FROM mt) ) ); - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation_table_alias.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation_table_alias.reference deleted file mode 100644 index 75236c0daf7..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation_table_alias.reference +++ /dev/null @@ -1,4 +0,0 @@ -6 1 -6 1 -12 2 -12 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation_table_alias.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation_table_alias.sql deleted file mode 100644 index c0f218edbd8..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_nested_with_aggregation_table_alias.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM ( SELECT sum(boo.x) FROM ( SELECT foo.x FROM (SELECT a AS x FROM mt) AS foo) AS boo ); - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_table_alias.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_table_alias.reference deleted file mode 100644 index ebf18a51290..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_table_alias.reference +++ /dev/null @@ -1,18 +0,0 @@ -1 1 -2 1 -3 1 -1 1 -2 1 -3 1 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 -1 2 -2 2 -3 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_table_alias.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_table_alias.sql deleted file mode 100644 index 2d7da9d6df5..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_table_alias.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT foo.x FROM (SELECT a AS x FROM mt) AS foo; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation.reference deleted file mode 100644 index 75236c0daf7..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation.reference +++ /dev/null @@ -1,4 +0,0 @@ -6 1 -6 1 -12 2 -12 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation.sql deleted file mode 100644 index bdf74949f57..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM (SELECT a FROM mt); - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation_in_subquery.reference b/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation_in_subquery.reference deleted file mode 100644 index 75236c0daf7..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation_in_subquery.reference +++ /dev/null @@ -1,4 +0,0 @@ -6 1 -6 1 -12 2 -12 2 diff --git a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation_in_subquery.sql b/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation_in_subquery.sql deleted file mode 100644 index a80af3e3337..00000000000 --- a/tests/queries/0_stateless/00973_live_view_with_subquery_select_with_aggregation_in_subquery.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM (SELECT sum(a) FROM mt); - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference b/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference deleted file mode 100644 index 6d50f0e9c3a..00000000000 --- a/tests/queries/0_stateless/00974_live_view_select_with_aggregation.reference +++ /dev/null @@ -1,2 +0,0 @@ -6 -21 diff --git a/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql b/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql deleted file mode 100644 index 09eea37de60..00000000000 --- a/tests/queries/0_stateless/00974_live_view_select_with_aggregation.sql +++ /dev/null @@ -1,20 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM mt; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT sum(a) FROM lv; - -INSERT INTO mt VALUES (4),(5),(6); - -SELECT sum(a) FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00975_live_view_create.sql b/tests/queries/0_stateless/00975_live_view_create.sql deleted file mode 100644 index 1e7eb674bcc..00000000000 --- a/tests/queries/0_stateless/00975_live_view_create.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM mt; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00975_values_list.sql b/tests/queries/0_stateless/00975_values_list.sql index 40c86898966..35afc99e93e 100644 --- a/tests/queries/0_stateless/00975_values_list.sql +++ b/tests/queries/0_stateless/00975_values_list.sql @@ -12,8 +12,8 @@ SELECT * FROM VALUES('n UInt64, s String, ss String', (1 + 22, '23', toString(23 SELECT * FROM VALUES('a Decimal(4, 4), b String, c String', (divide(toDecimal32(5, 3), 3), 'a', 'b')); -SELECT * FROM VALUES('x Float64', toUInt64(-1)); -- { serverError 69; } -SELECT * FROM VALUES('x Float64', NULL); -- { serverError 53; } +SELECT * FROM VALUES('x Float64', toUInt64(-1)); -- { serverError 69 } +SELECT * FROM VALUES('x Float64', NULL); -- { serverError 53 } SELECT * FROM VALUES('x Nullable(Float64)', NULL); DROP TABLE values_list; diff --git a/tests/queries/0_stateless/00976_live_view_select_version.reference b/tests/queries/0_stateless/00976_live_view_select_version.reference deleted file mode 100644 index 453bd800469..00000000000 --- a/tests/queries/0_stateless/00976_live_view_select_version.reference +++ /dev/null @@ -1,3 +0,0 @@ -1 1 -2 1 -3 1 diff --git a/tests/queries/0_stateless/00976_live_view_select_version.sql b/tests/queries/0_stateless/00976_live_view_select_version.sql deleted file mode 100644 index 39ca587a49f..00000000000 --- a/tests/queries/0_stateless/00976_live_view_select_version.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT * FROM mt; - -INSERT INTO mt VALUES (1),(2),(3); - -SELECT *,_version FROM lv; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00976_ttl_with_old_parts.sql b/tests/queries/0_stateless/00976_ttl_with_old_parts.sql index c224ca30a3c..084112681e2 100644 --- a/tests/queries/0_stateless/00976_ttl_with_old_parts.sql +++ b/tests/queries/0_stateless/00976_ttl_with_old_parts.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - drop table if exists ttl; create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) settings remove_empty_parts = 0; diff --git a/tests/queries/0_stateless/00977_live_view_watch_events.sql b/tests/queries/0_stateless/00977_live_view_watch_events.sql deleted file mode 100644 index 3fe395b45fa..00000000000 --- a/tests/queries/0_stateless/00977_live_view_watch_events.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt; - -WATCH lv EVENTS LIMIT 0; - -INSERT INTO mt VALUES (1),(2),(3); - -WATCH lv EVENTS LIMIT 0; - -INSERT INTO mt VALUES (4),(5),(6); - -WATCH lv EVENTS LIMIT 0; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00978_live_view_watch.reference b/tests/queries/0_stateless/00978_live_view_watch.reference deleted file mode 100644 index 6fbbedf1b21..00000000000 --- a/tests/queries/0_stateless/00978_live_view_watch.reference +++ /dev/null @@ -1,3 +0,0 @@ -0 1 -6 2 -21 3 diff --git a/tests/queries/0_stateless/00978_live_view_watch.sql b/tests/queries/0_stateless/00978_live_view_watch.sql deleted file mode 100644 index a76b8aef0d0..00000000000 --- a/tests/queries/0_stateless/00978_live_view_watch.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS mt; - -CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt; - -WATCH lv LIMIT 0; - -INSERT INTO mt VALUES (1),(2),(3); - -WATCH lv LIMIT 0; - -INSERT INTO mt VALUES (4),(5),(6); - -WATCH lv LIMIT 0; - -DROP TABLE lv; -DROP TABLE mt; diff --git a/tests/queries/0_stateless/00979_live_view_watch_continuous_aggregates.py b/tests/queries/0_stateless/00979_live_view_watch_continuous_aggregates.py deleted file mode 100755 index ef144d044c2..00000000000 --- a/tests/queries/0_stateless/00979_live_view_watch_continuous_aggregates.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send( - "CREATE TABLE test.mt (time DateTime, location String, temperature UInt32) Engine=MergeTree order by tuple()" - ) - client1.expect(prompt) - client1.send( - "CREATE LIVE VIEW test.lv AS SELECT toStartOfDay(time) AS day, location, avg(temperature) FROM test.mt GROUP BY day, location ORDER BY day, location" - ) - client1.expect(prompt) - client1.send("WATCH test.lv FORMAT CSVWithNames") - client2.send( - "INSERT INTO test.mt VALUES ('2019-01-01 00:00:00','New York',60),('2019-01-01 00:10:00','New York',70)" - ) - client2.expect(prompt) - client1.expect(r'"2019-01-01 00:00:00","New York",65') - client2.send( - "INSERT INTO test.mt VALUES ('2019-01-01 00:00:00','Moscow',30),('2019-01-01 00:10:00', 'Moscow', 40)" - ) - client2.expect(prompt) - client1.expect(r'"2019-01-01 00:00:00","Moscow",35') - client1.expect(r'"2019-01-01 00:00:00","New York",65') - client2.send( - "INSERT INTO test.mt VALUES ('2019-01-02 00:00:00','New York',50),('2019-01-02 00:10:00','New York',60)" - ) - client2.expect(prompt) - client1.expect(r'"2019-01-01 00:00:00","Moscow",35') - client1.expect(r'"2019-01-01 00:00:00","New York",65') - client1.expect(r'"2019-01-02 00:00:00","New York",55') - client2.send( - "INSERT INTO test.mt VALUES ('2019-01-02 00:00:00','Moscow',20),('2019-01-02 00:10:00', 'Moscow', 30)" - ) - client2.expect(prompt) - client1.expect(r'"2019-01-01 00:00:00","Moscow",35') - client1.expect(r'"2019-01-01 00:00:00","New York",65') - client1.expect(r'"2019-01-02 00:00:00","Moscow",25') - client1.expect(r'"2019-01-02 00:00:00","New York",55') - client2.send( - "INSERT INTO test.mt VALUES ('2019-01-02 00:03:00','New York',40),('2019-01-02 00:06:00','New York',30)" - ) - client2.expect(prompt) - client1.expect(r'"2019-01-01 00:00:00","Moscow",35') - client1.expect(r'"2019-01-01 00:00:00","New York",65') - client1.expect(r'"2019-01-02 00:00:00","Moscow",25') - client1.expect(r'"2019-01-02 00:00:00","New York",45') - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00979_live_view_watch_live.py b/tests/queries/0_stateless/00979_live_view_watch_live.py deleted file mode 100755 index b099b56ae48..00000000000 --- a/tests/queries/0_stateless/00979_live_view_watch_live.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt") - client1.expect(prompt) - client1.send("WATCH test.lv") - client1.expect("_version") - client1.expect(r"0.*1" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(r"6.*2" + end_of_block) - client2.expect(prompt) - client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") - client1.expect(r"21.*3" + end_of_block) - client2.expect(prompt) - for i in range(1, 129): - client2.send("INSERT INTO test.mt VALUES (1)") - client1.expect(r"%d.*%d" % (21 + i, 3 + i) + end_of_block) - client2.expect(prompt) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00979_live_view_watch_live_with_subquery.py b/tests/queries/0_stateless/00979_live_view_watch_live_with_subquery.py deleted file mode 100755 index a7c1adac214..00000000000 --- a/tests/queries/0_stateless/00979_live_view_watch_live_with_subquery.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2: - client1.expect(prompt) - client2.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client2.send("SET allow_experimental_live_view = 1") - client2.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send(" DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send( - "CREATE LIVE VIEW test.lv AS SELECT * FROM ( SELECT sum(A.a) FROM (SELECT * FROM test.mt) AS A )" - ) - client1.expect(prompt) - client1.send("WATCH test.lv") - client1.expect("_version") - client1.expect(r"0.*1" + end_of_block) - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client1.expect(r"6.*2" + end_of_block) - client2.expect(prompt) - client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") - client1.expect(r"21.*3" + end_of_block) - client2.expect(prompt) - for i in range(1, 129): - client2.send("INSERT INTO test.mt VALUES (1)") - client1.expect(r"%d.*%d" % (21 + i, 3 + i) + end_of_block) - client2.expect(prompt) - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - client1.send("DROP TABLE test.lv") - client1.expect(prompt) - client1.send("DROP TABLE test.mt") - client1.expect(prompt) diff --git a/tests/queries/0_stateless/00980_merge_alter_settings.sql b/tests/queries/0_stateless/00980_merge_alter_settings.sql index 02728a6ba8b..174d1fcd508 100644 --- a/tests/queries/0_stateless/00980_merge_alter_settings.sql +++ b/tests/queries/0_stateless/00980_merge_alter_settings.sql @@ -1,4 +1,4 @@ --- Tags: no-replicated-database, no-parallel +-- Tags: no-replicated-database -- Tag no-replicated-database: Unsupported type of ALTER query DROP TABLE IF EXISTS log_for_alter; diff --git a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql index 1b291bf84d2..b049e20cb6d 100644 --- a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql +++ b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.sql @@ -1,4 +1,4 @@ --- Tags: zookeeper, no-replicated-database, no-parallel +-- Tags: zookeeper, no-replicated-database -- Tag no-replicated-database: Unsupported type of ALTER query DROP TABLE IF EXISTS replicated_table_for_alter1; diff --git a/tests/queries/0_stateless/00988_expansion_aliases_limit.sql b/tests/queries/0_stateless/00988_expansion_aliases_limit.sql index 15c9f82da6f..3c2442b15b5 100644 --- a/tests/queries/0_stateless/00988_expansion_aliases_limit.sql +++ b/tests/queries/0_stateless/00988_expansion_aliases_limit.sql @@ -1 +1 @@ -SELECT 1 AS a, a + a AS b, b + b AS c, c + c AS d, d + d AS e, e + e AS f, f + f AS g, g + g AS h, h + h AS i, i + i AS j, j + j AS k, k + k AS l, l + l AS m, m + m AS n, n + n AS o, o + o AS p, p + p AS q, q + q AS r, r + r AS s, s + s AS t, t + t AS u, u + u AS v, v + v AS w, w + w AS x, x + x AS y, y + y AS z; -- { serverError 168 } +SELECT 1 AS a, a + a AS b, b + b AS c, c + c AS d, d + d AS e, e + e AS f, f + f AS g, g + g AS h, h + h AS i, i + i AS j, j + j AS k, k + k AS l, l + l AS m, m + m AS n, n + n AS o, o + o AS p, p + p AS q, q + q AS r, r + r AS s, s + s AS t, t + t AS u, u + u AS v, v + v AS w, w + w AS x, x + x AS y, y + y AS z; -- { serverError 36, 168 } diff --git a/tests/queries/0_stateless/00990_hasToken.python b/tests/queries/0_stateless/00990_hasToken.python index 7d3775adc9d..e9bc514474a 100644 --- a/tests/queries/0_stateless/00990_hasToken.python +++ b/tests/queries/0_stateless/00990_hasToken.python @@ -12,35 +12,46 @@ HAYSTACKS = [ NEEDLE = "needle" -HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE) -NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE) +HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE) +NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE) + def replace_follow_case(replacement): def func(match): g = match.group() - if g.islower(): return replacement.lower() - if g.istitle(): return replacement.title() - if g.isupper(): return replacement.upper() + if g.islower(): + return replacement.lower() + if g.istitle(): + return replacement.title() + if g.isupper(): + return replacement.upper() return replacement + return func + def replace_separators(query, new_sep): - SEP_RE = re.compile('\\s+') + SEP_RE = re.compile("\\s+") result = SEP_RE.sub(new_sep, query) return result -def enlarge_haystack(query, times, separator=''): - return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query) + +def enlarge_haystack(query, times, separator=""): + return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query) + def small_needle(query): - return NEEDLE_RE.sub(replace_follow_case('n'), query) + return NEEDLE_RE.sub(replace_follow_case("n"), query) + def remove_needle(query): - return NEEDLE_RE.sub('', query) + return NEEDLE_RE.sub("", query) + def replace_needle(query, new_needle): return NEEDLE_RE.sub(new_needle, query) + # with str.lower, str.uppert, str.title and such def transform_needle(query, string_transformation_func): def replace_with_transformation(match): @@ -49,19 +60,21 @@ def transform_needle(query, string_transformation_func): return NEEDLE_RE.sub(replace_with_transformation, query) -def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template): + +def create_cases( + case_sensitive_func, + case_insensitive_func, + table_row_template, + table_query_template, + const_query_template, +): const_queries = [] table_rows = [] table_queries = set() def add_case(func, haystack, needle, match): match = int(match) - args = dict( - func = func, - haystack = haystack, - needle = needle, - match = match - ) + args = dict(func=func, haystack=haystack, needle=needle, match=match) const_queries.append(const_query_template.substitute(args)) table_queries.add(table_query_template.substitute(args)) table_rows.append(table_row_template.substitute(args)) @@ -69,14 +82,28 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, def add_case_sensitive(haystack, needle, match): add_case(case_sensitive_func, haystack, needle, match) if match: - add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match) + add_case( + case_sensitive_func, + transform_needle(haystack, str.swapcase), + transform_needle(needle, str.swapcase), + match, + ) def add_case_insensitive(haystack, needle, match): add_case(case_insensitive_func, haystack, needle, match) if match: - add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match) - add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match) - + add_case( + case_insensitive_func, + transform_needle(haystack, str.swapcase), + needle, + match, + ) + add_case( + case_insensitive_func, + haystack, + transform_needle(needle, str.swapcase), + match, + ) # Negative cases add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False) @@ -85,7 +112,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, for haystack in HAYSTACKS: add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False) - sep = '' + sep = "" h = replace_separators(haystack, sep) add_case_sensitive(h, NEEDLE, False) @@ -102,8 +129,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, add_case_sensitive(haystack, NEEDLE, True) add_case_insensitive(haystack, NEEDLE, True) - - for sep in list(''' ,'''): + for sep in list(""" ,"""): h = replace_separators(haystack, sep) add_case_sensitive(h, NEEDLE, True) add_case_sensitive(small_needle(h), small_needle(NEEDLE), True) @@ -114,32 +140,43 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True) # case insesitivity works only on ASCII strings - add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True) - add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True) + add_case_sensitive( + replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True + ) + add_case_sensitive( + replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True + ) - for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']: + for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]: h = replace_separators(HAYSTACKS[0], sep) add_case(case_sensitive_func, h, NEEDLE, True) return table_rows, table_queries, const_queries -def main(): +def main(): def query(x): print(x) - CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""") - TABLE_QUERY = Template("""WITH '${needle}' as n + CONST_QUERY = Template( + """SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""" + ) + TABLE_QUERY = Template( + """WITH '${needle}' as n SELECT haystack, needle, ${func}(haystack, n) as result FROM ht - WHERE func = '${func}' AND needle = n AND result != match;""") + WHERE func = '${func}' AND needle = n AND result != match;""" + ) TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""") - rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY) + rows, table_queries, const_queries = create_cases( + "hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY + ) for q in const_queries: query(q) - query("""DROP TABLE IF EXISTS ht; + query( + """DROP TABLE IF EXISTS ht; CREATE TABLE IF NOT EXISTS ht ( @@ -150,11 +187,15 @@ def main(): ) ENGINE MergeTree() ORDER BY haystack; -INSERT INTO ht VALUES {values};""".format(values=", ".join(rows))) +INSERT INTO ht VALUES {values};""".format( + values=", ".join(rows) + ) + ) for q in sorted(table_queries): query(q) query("""DROP TABLE ht""") -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference index 4b3beccf5f1..c462030edbf 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference @@ -2,6 +2,12 @@ 0 2007 2007 +0 +2007 +2007 +2007 +2007 +2007 2007 0 2007 diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql index 41676905771..8e88af40046 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql @@ -18,15 +18,35 @@ SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz') SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz'); SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz'); -select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC'); -select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz'); +-- as table "bloom_filter" but w/o index_granularity_bytes +drop table if exists bloom_filter2; +create table bloom_filter2 +( + id UInt64, + s String, + index tok_bf3 (s, lower(s)) type tokenbf_v1(512, 3, 0) GRANULARITY 1 +) engine = MergeTree() order by id settings index_granularity = 8; + +insert into bloom_filter2 select number, 'yyy,uuu' from numbers(1024); +insert into bloom_filter2 select number+2000, 'ABC,def,zzz' from numbers(8); +insert into bloom_filter2 select number+3000, 'yyy,uuu' from numbers(1024); +insert into bloom_filter2 select number+3000, 'abcdefzzz' from numbers(1024); set max_rows_to_read = 16; SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc'); +SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'ABC'); +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC'); +select max(id) from bloom_filter where hasTokenCaseInsensitiveOrNull(s, 'ABC'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz'); +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz'); + +select max(id) from bloom_filter2 where hasToken(s, 'ABC'); +select max(id) from bloom_filter2 where hasToken(s, 'abc'); +select max(id) from bloom_filter2 where hasTokenCaseInsensitive(s, 'abc'); +select max(id) from bloom_filter2 where hasTokenCaseInsensitive(s, 'ABC'); -- invert result -- this does not work as expected, reading more rows that it should diff --git a/tests/queries/0_stateless/00991_live_view_watch_event_live.python b/tests/queries/0_stateless/00991_live_view_watch_event_live.python deleted file mode 100644 index 901d388ec01..00000000000 --- a/tests/queries/0_stateless/00991_live_view_watch_event_live.python +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 - -import subprocess -import threading -import queue as queue -import os -import sys -import signal - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_query_in_process_group(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) - - -def read_lines_and_push_to_queue(pipe, queue): - try: - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - except KeyboardInterrupt: - pass - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() - line = q.get() - print(line) - assert (line == '21\t3') - - # Send Ctrl+C to client. - os.killpg(os.getpgid(p.pid), signal.SIGINT) - # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() - line = q.get() - print(line) - assert (line is None) - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/tests/queries/0_stateless/00991_live_view_watch_event_live.reference b/tests/queries/0_stateless/00991_live_view_watch_event_live.reference deleted file mode 100644 index 1e94cdade41..00000000000 --- a/tests/queries/0_stateless/00991_live_view_watch_event_live.reference +++ /dev/null @@ -1,7 +0,0 @@ -0 1 -0 1 -6 2 -6 2 -21 3 -21 3 -None diff --git a/tests/queries/0_stateless/00991_live_view_watch_http.python b/tests/queries/0_stateless/00991_live_view_watch_http.python deleted file mode 100755 index d5a1e6e8ed9..00000000000 --- a/tests/queries/0_stateless/00991_live_view_watch_http.python +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 - -import subprocess -import threading -import queue as queue -import os -import sys - - -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') - - -def send_query(query): - cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] - # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def send_http_query(query): - cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10']) - cmd += ['-sSN', CLICKHOUSE_URL, '-d', query] - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout - - -def read_lines_and_push_to_queue(pipe, queue): - for line in iter(pipe.readline, ''): - line = line.strip() - print(line) - sys.stdout.flush() - queue.put(line) - - queue.put(None) - - -def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() - - q = queue.Queue() - pipe = send_http_query('WATCH test.lv') - thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q)) - thread.start() - - line = q.get() - print(line) - assert (line == '0\t1') - - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() - line = q.get() - print(line) - assert (line == '6\t2') - - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() - - thread.join() - -test() diff --git a/tests/queries/0_stateless/00991_live_view_watch_http.reference b/tests/queries/0_stateless/00991_live_view_watch_http.reference deleted file mode 100644 index 489457d751b..00000000000 --- a/tests/queries/0_stateless/00991_live_view_watch_http.reference +++ /dev/null @@ -1,4 +0,0 @@ -0 1 -0 1 -6 2 -6 2 diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index 8dbd10fc27b..5b1c50262bf 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -13,8 +13,8 @@ $CLICKHOUSE_CLIENT -n -q " DROP TABLE IF EXISTS alter_table0; DROP TABLE IF EXISTS alter_table1; - CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; - CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 + CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 100)); + CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 200)); " function thread1() diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index f143c97bdf4..f4f38ad9c83 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel, no-upgrade-check +# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -56,12 +56,13 @@ function thread6() while true; do REPLICA=$(($RANDOM % 10)) $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS alter_table_$REPLICA; - CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0;"; + CREATE TABLE alter_table_$REPLICA (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$REPLICA') ORDER BY a PARTITION BY b % 10 + SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));"; sleep 0.$RANDOM; done } - # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout export -f thread1; export -f thread2; diff --git a/tests/queries/0_stateless/00995_order_by_with_fill.reference b/tests/queries/0_stateless/00995_order_by_with_fill.reference index 0036aabda40..4863c83c544 100644 --- a/tests/queries/0_stateless/00995_order_by_with_fill.reference +++ b/tests/queries/0_stateless/00995_order_by_with_fill.reference @@ -1,4 +1,9 @@ -*** table without fill to compare *** +--{ echoOn } +DROP TABLE IF EXISTS fill; +CREATE TABLE fill (date Date, val Int, str String) ENGINE = Memory; +INSERT INTO fill VALUES (toDate('2019-05-24'), 13, 'sd0')(toDate('2019-05-10'), 16, 'vp7')(toDate('2019-05-25'), 17, '0ei')(toDate('2019-05-30'), 18, '3kd')(toDate('2019-05-15'), 27, 'enb')(toDate('2019-06-04'), 5, '6az')(toDate('2019-05-23'), 15, '01v')(toDate('2019-05-08'), 28, 'otf')(toDate('2019-05-19'), 20, 'yfh')(toDate('2019-05-07'), 26, '2ke')(toDate('2019-05-07'), 18, 'prh')(toDate('2019-05-09'), 25, '798')(toDate('2019-05-10'), 1, 'myj')(toDate('2019-05-11'), 18, '3s2')(toDate('2019-05-23'), 29, '72y'); +-- *** table without fill to compare *** +SELECT * FROM fill ORDER BY date, val; 2019-05-07 18 prh 2019-05-07 26 2ke 2019-05-08 28 otf @@ -14,7 +19,9 @@ 2019-05-25 17 0ei 2019-05-30 18 3kd 2019-06-04 5 6az -*** date WITH FILL, val *** +-- Some useful cases + +SELECT * FROM fill ORDER BY date WITH FILL, val; 2019-05-07 18 prh 2019-05-07 26 2ke 2019-05-08 28 otf @@ -47,7 +54,7 @@ 2019-06-02 0 2019-06-03 0 2019-06-04 5 6az -*** date WITH FILL FROM 2019-05-01 TO 2019-05-31, val WITH FILL *** +SELECT * FROM fill ORDER BY date WITH FILL FROM toDate('2019-05-01') TO toDate('2019-05-31'), val WITH FILL; 2019-05-01 0 2019-05-02 0 2019-05-03 0 @@ -116,7 +123,7 @@ 2019-05-29 0 2019-05-30 18 3kd 2019-06-04 5 6az -*** date DESC WITH FILL, val WITH FILL FROM 1 TO 6 *** +SELECT * FROM fill ORDER BY date DESC WITH FILL, val WITH FILL FROM 1 TO 6; 2019-06-04 1 2019-06-04 2 2019-06-04 3 @@ -275,7 +282,9 @@ 2019-05-07 5 2019-05-07 18 prh 2019-05-07 26 2ke -*** date DESC WITH FILL TO 2019-05-01 STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3 *** +-- Some weird cases + +SELECT * FROM fill ORDER BY date DESC WITH FILL TO toDate('2019-05-01') STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3; 2019-06-04 10 2019-06-04 7 2019-06-04 5 6az @@ -376,7 +385,7 @@ 2019-05-03 4 2019-05-03 1 2019-05-03 -2 -*** date WITH FILL TO 2019-06-23 STEP 3, val WITH FILL FROM -10 STEP 2 +SELECT * FROM fill ORDER BY date WITH FILL TO toDate('2019-06-23') STEP 3, val WITH FILL FROM -10 STEP 2; 2019-05-07 -10 2019-05-07 -8 2019-05-07 -6 @@ -463,14 +472,18 @@ 2019-06-15 -10 2019-06-18 -10 2019-06-21 -10 -*** table without fill to compare *** +DROP TABLE fill; +CREATE TABLE fill (a UInt32, b Int32) ENGINE = Memory; +INSERT INTO fill VALUES (1, -2), (1, 3), (3, 2), (5, -1), (6, 5), (8, 0); +-- *** table without fill to compare *** +SELECT * FROM fill ORDER BY a, b; 1 -2 1 3 3 2 5 -1 6 5 8 0 -*** a WITH FILL, b WITH fill *** +SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill; 1 -2 1 -1 1 0 @@ -484,7 +497,7 @@ 6 5 7 0 8 0 -*** a WITH FILL, b WITH fill TO 6 STEP 2 *** +SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill TO 6 STEP 2; 1 -2 1 0 1 2 @@ -503,3 +516,8 @@ 8 0 8 2 8 4 +SELECT * FROM fill ORDER BY a WITH FILL STEP -1; -- { serverError 475 } +SELECT * FROM fill ORDER BY a WITH FILL FROM 10 TO 1; -- { serverError 475 } +SELECT * FROM fill ORDER BY a DESC WITH FILL FROM 1 TO 10; -- { serverError 475 } +SELECT * FROM fill ORDER BY a WITH FILL FROM -10 to 10; -- { serverError 475 } +DROP TABLE fill; diff --git a/tests/queries/0_stateless/00995_order_by_with_fill.sql b/tests/queries/0_stateless/00995_order_by_with_fill.sql index 7f7f85bdb5b..fe7a6e5d4ce 100644 --- a/tests/queries/0_stateless/00995_order_by_with_fill.sql +++ b/tests/queries/0_stateless/00995_order_by_with_fill.sql @@ -1,40 +1,34 @@ +--{ echoOn } DROP TABLE IF EXISTS fill; CREATE TABLE fill (date Date, val Int, str String) ENGINE = Memory; INSERT INTO fill VALUES (toDate('2019-05-24'), 13, 'sd0')(toDate('2019-05-10'), 16, 'vp7')(toDate('2019-05-25'), 17, '0ei')(toDate('2019-05-30'), 18, '3kd')(toDate('2019-05-15'), 27, 'enb')(toDate('2019-06-04'), 5, '6az')(toDate('2019-05-23'), 15, '01v')(toDate('2019-05-08'), 28, 'otf')(toDate('2019-05-19'), 20, 'yfh')(toDate('2019-05-07'), 26, '2ke')(toDate('2019-05-07'), 18, 'prh')(toDate('2019-05-09'), 25, '798')(toDate('2019-05-10'), 1, 'myj')(toDate('2019-05-11'), 18, '3s2')(toDate('2019-05-23'), 29, '72y'); -SELECT '*** table without fill to compare ***'; +-- *** table without fill to compare *** SELECT * FROM fill ORDER BY date, val; -- Some useful cases -SELECT '*** date WITH FILL, val ***'; SELECT * FROM fill ORDER BY date WITH FILL, val; -SELECT '*** date WITH FILL FROM 2019-05-01 TO 2019-05-31, val WITH FILL ***'; SELECT * FROM fill ORDER BY date WITH FILL FROM toDate('2019-05-01') TO toDate('2019-05-31'), val WITH FILL; -SELECT '*** date DESC WITH FILL, val WITH FILL FROM 1 TO 6 ***'; SELECT * FROM fill ORDER BY date DESC WITH FILL, val WITH FILL FROM 1 TO 6; -- Some weird cases -SELECT '*** date DESC WITH FILL TO 2019-05-01 STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3 ***'; SELECT * FROM fill ORDER BY date DESC WITH FILL TO toDate('2019-05-01') STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3; -SELECT '*** date WITH FILL TO 2019-06-23 STEP 3, val WITH FILL FROM -10 STEP 2'; SELECT * FROM fill ORDER BY date WITH FILL TO toDate('2019-06-23') STEP 3, val WITH FILL FROM -10 STEP 2; DROP TABLE fill; CREATE TABLE fill (a UInt32, b Int32) ENGINE = Memory; INSERT INTO fill VALUES (1, -2), (1, 3), (3, 2), (5, -1), (6, 5), (8, 0); -SELECT '*** table without fill to compare ***'; +-- *** table without fill to compare *** SELECT * FROM fill ORDER BY a, b; -SELECT '*** a WITH FILL, b WITH fill ***'; SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill; -SELECT '*** a WITH FILL, b WITH fill TO 6 STEP 2 ***'; SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill TO 6 STEP 2; SELECT * FROM fill ORDER BY a WITH FILL STEP -1; -- { serverError 475 } diff --git a/tests/queries/0_stateless/01000_subquery_requires_alias.reference b/tests/queries/0_stateless/01000_subquery_requires_alias.reference index 8018af28430..b6ceb2a2a72 100644 --- a/tests/queries/0_stateless/01000_subquery_requires_alias.reference +++ b/tests/queries/0_stateless/01000_subquery_requires_alias.reference @@ -1,2 +1,2 @@ -1 2 3 -1 2 3 +2 1 3 +2 1 3 diff --git a/tests/queries/0_stateless/01000_subquery_requires_alias.sql b/tests/queries/0_stateless/01000_subquery_requires_alias.sql index d14d4637be8..27320fab933 100644 --- a/tests/queries/0_stateless/01000_subquery_requires_alias.sql +++ b/tests/queries/0_stateless/01000_subquery_requires_alias.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET joined_subquery_requires_alias = 1; SELECT * FROM (SELECT 1 as A, 2 as B) X diff --git a/tests/queries/0_stateless/01009_insert_select_data_loss.sql b/tests/queries/0_stateless/01009_insert_select_data_loss.sql index a153d7dbc30..cbf87c4d0f1 100644 --- a/tests/queries/0_stateless/01009_insert_select_data_loss.sql +++ b/tests/queries/0_stateless/01009_insert_select_data_loss.sql @@ -1,7 +1,7 @@ drop table if exists tab; create table tab (x UInt64) engine = MergeTree order by tuple(); -insert into tab select number as n from numbers(20) nums +insert into tab select n from (SELECT number AS n FROM numbers(20)) nums semi left join (select number * 10 as n from numbers(2)) js2 using(n) settings max_block_size = 5; select * from tab order by x; diff --git a/tests/queries/0_stateless/01010_pm_join_all_join_bug.sql b/tests/queries/0_stateless/01010_pm_join_all_join_bug.sql index 18a67f41194..278aa46a479 100644 --- a/tests/queries/0_stateless/01010_pm_join_all_join_bug.sql +++ b/tests/queries/0_stateless/01010_pm_join_all_join_bug.sql @@ -10,6 +10,6 @@ SELECT '-'; SELECT * FROM ints l INNER JOIN ints r USING i64 ORDER BY l.i32, r.i32; SELECT '-'; -SELECT count() FROM ( SELECT [1], count(1) ) AS t1 ALL RIGHT JOIN ( SELECT number AS s FROM numbers(2) ) AS t2 USING (s); -- { serverError NOT_FOUND_COLUMN_IN_BLOCK } +SELECT count() FROM ( SELECT [1], count(1) ) AS t1 ALL RIGHT JOIN ( SELECT number AS s FROM numbers(2) ) AS t2 USING (s); -- { serverError NOT_FOUND_COLUMN_IN_BLOCK, UNKNOWN_IDENTIFIER } DROP TABLE ints; diff --git a/tests/queries/0_stateless/01010_pmj_on_disk.sql b/tests/queries/0_stateless/01010_pmj_on_disk.sql index d4fb9184896..4925f78f82f 100644 --- a/tests/queries/0_stateless/01010_pmj_on_disk.sql +++ b/tests/queries/0_stateless/01010_pmj_on_disk.sql @@ -1,6 +1,6 @@ SET join_algorithm = 'hash'; -SELECT number as n, j FROM numbers(4) nums +SELECT n, j FROM (SELECT number as n FROM numbers(4)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number + 10 AS j FROM numbers(4000) @@ -10,7 +10,7 @@ ORDER BY n; SET max_rows_in_join = 1000; -SELECT number as n, j FROM numbers(4) nums +SELECT n, j FROM (SELECT number AS n FROM numbers(4)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number + 10 AS j FROM numbers(4000) @@ -20,7 +20,7 @@ ORDER BY n; -- { serverError 191 } SET join_algorithm = 'partial_merge'; -SELECT number as n, j FROM numbers(4) nums +SELECT n, j FROM (SELECT number as n FROM numbers(4)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number + 10 AS j FROM numbers(4000) @@ -30,7 +30,7 @@ ORDER BY n; SET partial_merge_join_optimizations = 1; -SELECT number as n, j FROM numbers(4) nums +SELECT n, j FROM (SELECT number AS n FROM numbers(4)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number + 10 AS j FROM numbers(4000) @@ -40,7 +40,7 @@ ORDER BY n; SET join_algorithm = 'auto'; -SELECT number as n, j FROM numbers(4) nums +SELECT n, j FROM (SELECT number AS n FROM numbers(4)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number + 10 AS j FROM numbers(4000) @@ -50,7 +50,7 @@ ORDER BY n; SET max_rows_in_join = '10'; -SELECT number as n, j FROM numbers(4) nums +SELECT n, j FROM (SELECT number AS n FROM numbers(4)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number + 10 AS j FROM numbers(4000) diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index 7804ce32a5a..f9f30b44700 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -3,7 +3,10 @@ SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -13,14 +16,20 @@ USING n; -- { serverError 241 } SET join_algorithm = 'partial_merge'; SET default_max_bytes_in_join = 0; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) ) js2 USING n; -- { serverError 12 } -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -28,7 +37,10 @@ ANY LEFT JOIN ( USING n SETTINGS max_bytes_in_join = 30000000; -- { serverError 241 } -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -39,7 +51,10 @@ SETTINGS max_bytes_in_join = 10000000; SET partial_merge_join_optimizations = 1; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) @@ -50,7 +65,10 @@ SETTINGS max_rows_in_join = 100000; SET default_max_bytes_in_join = 10000000; -SELECT number * 200000 as n, j FROM numbers(5) nums +SELECT n, j FROM +( + SELECT number * 200000 as n FROM numbers(5) +) nums JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) diff --git a/tests/queries/0_stateless/01013_repeat_function.reference b/tests/queries/0_stateless/01013_repeat_function.reference index 46bb248a99a..ea0dadd524f 100644 --- a/tests/queries/0_stateless/01013_repeat_function.reference +++ b/tests/queries/0_stateless/01013_repeat_function.reference @@ -1,7 +1,7 @@ abcabcabcabcabcabcabcabcabcabc abcabcabc -sdfggsdfgg -xywq + + abcabcabcabcabcabcabcabcabcabcabcabc sdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfgg @@ -20,8 +20,8 @@ sdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfggsdfgg xywqxywqxywqxywqxywqxywqxywqxywqxywqxywq plkfplkfplkfplkfplkfplkfplkfplkfplkfplkf abcabcabc -abcabc -abc + + abcabcabcabcabcabcabcabcabcabcabcabc abcabcabcabcabcabcabcabcabcabc diff --git a/tests/queries/0_stateless/01013_repeat_function.sql b/tests/queries/0_stateless/01013_repeat_function.sql index 85b0c16b4ab..b29cc032f28 100644 --- a/tests/queries/0_stateless/01013_repeat_function.sql +++ b/tests/queries/0_stateless/01013_repeat_function.sql @@ -3,20 +3,20 @@ DROP TABLE IF EXISTS defaults; CREATE TABLE defaults ( strings String, - u8 UInt8, + i8 Int8, u16 UInt16, u32 UInt32, u64 UInt64 )ENGINE = Memory(); -INSERT INTO defaults values ('abc', 3, 12, 4, 56) ('sdfgg', 2, 10, 21, 200) ('xywq', 1, 4, 9, 5) ('plkf', 0, 5, 7,77); +INSERT INTO defaults values ('abc', 3, 12, 4, 56) ('sdfgg', -2, 10, 21, 200) ('xywq', -1, 4, 9, 5) ('plkf', 0, 5, 7,77); -SELECT repeat(strings, u8) FROM defaults; +SELECT repeat(strings, i8) FROM defaults; SELECT repeat(strings, u16) FROM defaults; SELECT repeat(strings, u32) from defaults; SELECT repeat(strings, u64) FROM defaults; SELECT repeat(strings, 10) FROM defaults; -SELECT repeat('abc', u8) FROM defaults; +SELECT repeat('abc', i8) FROM defaults; SELECT repeat('abc', u16) FROM defaults; SELECT repeat('abc', u32) FROM defaults; SELECT repeat('abc', u64) FROM defaults; diff --git a/tests/queries/0_stateless/01013_totals_without_aggregation.reference b/tests/queries/0_stateless/01013_totals_without_aggregation.reference index 6dddf22a467..a1f5c1aa914 100644 --- a/tests/queries/0_stateless/01013_totals_without_aggregation.reference +++ b/tests/queries/0_stateless/01013_totals_without_aggregation.reference @@ -1,7 +1,7 @@ 11 -11 -12 +0 12 +0 13 -13 +0 diff --git a/tests/queries/0_stateless/01013_totals_without_aggregation.sql b/tests/queries/0_stateless/01013_totals_without_aggregation.sql index 584a8994767..291f95c6bd6 100644 --- a/tests/queries/0_stateless/01013_totals_without_aggregation.sql +++ b/tests/queries/0_stateless/01013_totals_without_aggregation.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + SELECT 11 AS n GROUP BY n WITH TOTALS; SELECT 12 AS n GROUP BY n WITH ROLLUP; SELECT 13 AS n GROUP BY n WITH CUBE; diff --git a/tests/queries/0_stateless/01017_bithamming_distance.reference b/tests/queries/0_stateless/01017_bithamming_distance.reference index cc2d4f39154..3e82e0d2864 100644 --- a/tests/queries/0_stateless/01017_bithamming_distance.reference +++ b/tests/queries/0_stateless/01017_bithamming_distance.reference @@ -13,3 +13,18 @@ 5 9 9 +0 +6 +6 +0 +6 +47 +0 +6 +47 +0 +6 +26 +0 +6 +26 diff --git a/tests/queries/0_stateless/01017_bithamming_distance.sql b/tests/queries/0_stateless/01017_bithamming_distance.sql index 4b36894b97c..11f4f27d9a8 100644 --- a/tests/queries/0_stateless/01017_bithamming_distance.sql +++ b/tests/queries/0_stateless/01017_bithamming_distance.sql @@ -18,3 +18,27 @@ SELECT bitHammingDistance(n2, 100) FROM defaults; SELECT bitHammingDistance(n3, n4) FROM defaults; DROP TABLE defaults; + +DROP TABLE IF EXISTS test_string; + +CREATE TABLE test_string +( + s1 String, + s2 String, + s3 FixedString(10), + s4 FixedString(10), +) ENGINE = Memory; + +INSERT INTO test_string VALUES ('hello', 'hello', 'hello', 'hello') ('hello', 'hellow', 'hello', 'hellow') ('clickhouse', '012345', 'clickhouse', '012345'); + +SELECT bitHammingDistance('hello', 'hello'); +SELECT bitHammingDistance('hello', 'hellow'); +SELECT bitHammingDistance(toFixedString('hello', 6), toFixedString('hellow', 6)); + +SELECT bitHammingDistance(s1, s2) FROM test_string; +SELECT bitHammingDistance(s3, s4) FROM test_string; + +SELECT bitHammingDistance('hello', s2) FROM test_string; +SELECT bitHammingDistance(s4, toFixedString('hello', 10)) FROM test_string; + +DROP TABLE test_string; diff --git a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh index e4d091ea0bb..4f35b69da0b 100755 --- a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh +++ b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh @@ -60,7 +60,7 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 UPDATE y = y + rand() % 1 WHERE no # hm... it looks like joinGet condidered determenistic ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = joinGet('${CLICKHOUSE_DATABASE}.lookup_table', 'y_new', y) WHERE x=1" 2>&1 \ -&& echo 'OK' || echo 'FAIL' +| grep -F -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE dictHas('${CLICKHOUSE_DATABASE}.dict1', toUInt64(x))" 2>&1 \ | grep -F -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/01018_Distributed__shard_num.reference b/tests/queries/0_stateless/01018_Distributed__shard_num.reference index 46963a006ec..232f12ed101 100644 --- a/tests/queries/0_stateless/01018_Distributed__shard_num.reference +++ b/tests/queries/0_stateless/01018_Distributed__shard_num.reference @@ -85,7 +85,7 @@ SELECT a._shard_num, a.key, b.host_name, b.host_address IN ('::1', '127.0.0.1'), FROM dist_1 a JOIN system.clusters b ON a._shard_num = b.shard_num -WHERE b.cluster = 'test_cluster_two_shards_localhost'; -- { serverError 47; } +WHERE b.cluster = 'test_cluster_two_shards_localhost'; -- { serverError 47, 403 } SELECT 'dist_3'; dist_3 SELECT * FROM dist_3; diff --git a/tests/queries/0_stateless/01018_Distributed__shard_num.sql b/tests/queries/0_stateless/01018_Distributed__shard_num.sql index d3f4e1ac527..7e31062348d 100644 --- a/tests/queries/0_stateless/01018_Distributed__shard_num.sql +++ b/tests/queries/0_stateless/01018_Distributed__shard_num.sql @@ -79,7 +79,7 @@ SELECT a._shard_num, a.key, b.host_name, b.host_address IN ('::1', '127.0.0.1'), FROM dist_1 a JOIN system.clusters b ON a._shard_num = b.shard_num -WHERE b.cluster = 'test_cluster_two_shards_localhost'; -- { serverError 47; } +WHERE b.cluster = 'test_cluster_two_shards_localhost'; -- { serverError 47, 403 } SELECT 'dist_3'; SELECT * FROM dist_3; diff --git a/tests/queries/0_stateless/01018_ambiguous_column.reference b/tests/queries/0_stateless/01018_ambiguous_column.reference index a2a1d6ea4f6..308726fa184 100644 --- a/tests/queries/0_stateless/01018_ambiguous_column.reference +++ b/tests/queries/0_stateless/01018_ambiguous_column.reference @@ -1,12 +1,15 @@ 0 0 0 0 +0 0 0 0 0 0 -┌─one.dummy─┬─A.dummy─┬─B.dummy─┐ -│ 0 │ 0 │ 0 │ -└───────────┴─────────┴─────────┘ +0 +0 +┌─system.one.dummy─┬─A.dummy─┬─B.dummy─┐ +│ 0 │ 0 │ 0 │ +└──────────────────┴─────────┴─────────┘ ┌─A.dummy─┬─one.dummy─┬─two.dummy─┐ │ 0 │ 0 │ 0 │ └─────────┴───────────┴───────────┘ diff --git a/tests/queries/0_stateless/01018_ambiguous_column.sql b/tests/queries/0_stateless/01018_ambiguous_column.sql index 54603aab810..620bdb6ba3f 100644 --- a/tests/queries/0_stateless/01018_ambiguous_column.sql +++ b/tests/queries/0_stateless/01018_ambiguous_column.sql @@ -1,4 +1,6 @@ -select * from system.one cross join system.one; -- { serverError 352 } +SET allow_experimental_analyzer = 1; + +select * from system.one cross join system.one; select * from system.one cross join system.one r; select * from system.one l cross join system.one; select * from system.one left join system.one using dummy; @@ -8,10 +10,10 @@ USE system; SELECT dummy FROM one AS A JOIN one ON A.dummy = one.dummy; SELECT dummy FROM one JOIN one AS A ON A.dummy = one.dummy; -SELECT dummy FROM one l JOIN one r ON dummy = r.dummy; -- { serverError 352 } -SELECT dummy FROM one l JOIN one r ON l.dummy = dummy; -- { serverError 352 } -SELECT dummy FROM one l JOIN one r ON one.dummy = r.dummy; -- { serverError 352 } -SELECT dummy FROM one l JOIN one r ON l.dummy = one.dummy; -- { serverError 352 } +SELECT dummy FROM one l JOIN one r ON dummy = r.dummy; +SELECT dummy FROM one l JOIN one r ON l.dummy = dummy; -- { serverError 403 } +SELECT dummy FROM one l JOIN one r ON one.dummy = r.dummy; +SELECT dummy FROM one l JOIN one r ON l.dummy = one.dummy; -- { serverError 403 } SELECT * from one JOIN one A ON one.dummy = A.dummy diff --git a/tests/queries/0_stateless/01018_empty_aggregation_filling.reference b/tests/queries/0_stateless/01018_empty_aggregation_filling.reference index c29807a7e15..975b48c57f9 100644 --- a/tests/queries/0_stateless/01018_empty_aggregation_filling.reference +++ b/tests/queries/0_stateless/01018_empty_aggregation_filling.reference @@ -54,7 +54,7 @@ hello 2011-04-05 14:19:19 -123.45 -123.45 -inf -inf +nan +nan -123.45 -123.45 diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh index e90085f4e8e..3a2eac1f38f 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh @@ -50,12 +50,20 @@ function insert_thread() { function alter_thread() { trap 'exit' INT - ALTER[0]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;" - ALTER[1]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + # Generate random ALTERs, but make sure that at least one of them is for each source table. + for i in {0..5}; do + ALTER[$i]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;" + done + # Insert 3 ALTERs to src_b, one in the first half of the array and two in arbitrary positions. + ALTER[$RANDOM % 3]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + i=0 while true; do - $CLICKHOUSE_CLIENT --allow_experimental_alter_materialized_view_structure=1 \ - -q "${ALTER[$RANDOM % 2]}" + $CLICKHOUSE_CLIENT --allow_experimental_alter_materialized_view_structure=1 -q "${ALTER[$i % 6]}" + ((i=i+1)) + sleep "0.0$RANDOM" is_done=$($CLICKHOUSE_CLIENT -q "SELECT countIf(case = 1) > 0 AND countIf(case = 2) > 0 FROM mv;") diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh index 4c0afc4c439..8ef03be02b6 100755 --- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh @@ -70,8 +70,8 @@ timeout $TIMEOUT bash -c thread5 2> /dev/null & wait -echo "DROP TABLE src NO DELAY" | ${CLICKHOUSE_CLIENT} -echo "DROP TABLE dst NO DELAY" | ${CLICKHOUSE_CLIENT} +echo "DROP TABLE src SYNC" | ${CLICKHOUSE_CLIENT} +echo "DROP TABLE dst SYNC" | ${CLICKHOUSE_CLIENT} sleep 5 # Check for deadlocks diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference index c89fe48d9f9..8d40aebacf2 100644 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.reference @@ -1,5 +1,5 @@ 122 -Table dictdb_01041_01040.dict_invalidate doesn\'t exist +1 133 diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh index 7249d5e1a82..6856f952a47 100755 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh @@ -53,7 +53,7 @@ function check_exception_detected() export -f check_exception_detected; timeout 30 bash -c check_exception_detected 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "Table dictdb_01041_01040.dict_invalidate .* exist" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "dictdb_01041_01040.dict_invalidate.*UNKNOWN_TABLE" | wc -l $CLICKHOUSE_CLIENT --query " CREATE TABLE dictdb_01041_01040.dict_invalidate diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index 2d9911287a3..bf1ac254783 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -1,5 +1,6 @@ -- Tags: no-parallel +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS test_01047; set allow_deprecated_database_ordinary=1; diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index 4c329f99f6e..f87d9aa023e 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -1,5 +1,6 @@ -- Tags: no-parallel +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP DATABASE IF EXISTS test_01048; set allow_deprecated_database_ordinary=1; diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index d9604bb2b52..c52a6fefacb 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; @@ -5,29 +6,29 @@ DROP TABLE IF EXISTS mt; CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---With w_end---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WithOut w_end---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WITH---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WHERE---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---ORDER_BY---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01051_system_stack_trace.sql b/tests/queries/0_stateless/01051_system_stack_trace.sql index e322462a46a..7eb2a05dc87 100644 --- a/tests/queries/0_stateless/01051_system_stack_trace.sql +++ b/tests/queries/0_stateless/01051_system_stack_trace.sql @@ -1,4 +1,4 @@ --- Tags: race +SET storage_system_stack_trace_pipe_read_timeout_ms = 1000; -- { echo } SELECT count() > 0 FROM system.stack_trace WHERE query_id != ''; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 472dc66f1a2..b37e4ed3095 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; @@ -5,29 +6,29 @@ DROP TABLE IF EXISTS mt; CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---With w_end---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WithOut w_end---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WITH---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WHERE---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---ORDER_BY---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; -DROP TABLE IF EXISTS wv NO DELAY; +DROP TABLE IF EXISTS wv SYNC; CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh index 9fdc66191d7..e75b7d9570b 100755 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -4,7 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery < 1 as ok from (select * from odbc('DSN={ClickHouse DSN (Unicode)}','system','tables'))" +# ODBC will do HEAD request because of progress bar +# in normal situation, 501 will be returned and no Error is logged +# but sometimes we get I/O broken pipe producing an Error log but it doesn't affect the run of the test +${CLICKHOUSE_CLIENT} --query "select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (Unicode)}','system','tables'))" 2> >(grep -Fv "Failed to make HTTP_HEAD request") ${CLICKHOUSE_CLIENT} --query "CREATE TABLE t (x UInt8, y Float32, z String) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query "INSERT INTO t VALUES (1,0.1,'a я'),(2,0.2,'b ą'),(3,0.3,'c d')" -${CLICKHOUSE_CLIENT} --query "SELECT x, y, z FROM odbc('DSN={ClickHouse DSN (ANSI)}','$CLICKHOUSE_DATABASE','t') ORDER BY x" -${CLICKHOUSE_CLIENT} --query "SELECT x, y, z FROM odbc('DSN={ClickHouse DSN (Unicode)}','$CLICKHOUSE_DATABASE','t') ORDER BY x" +${CLICKHOUSE_CLIENT} --query "SELECT x, y, z FROM odbc('DSN={ClickHouse DSN (ANSI)}','$CLICKHOUSE_DATABASE','t') ORDER BY x" 2> >(grep -Fv "Failed to make HTTP_HEAD request") +${CLICKHOUSE_CLIENT} --query "SELECT x, y, z FROM odbc('DSN={ClickHouse DSN (Unicode)}','$CLICKHOUSE_DATABASE','t') ORDER BY x" 2> >(grep -Fv "Failed to make HTTP_HEAD request") ${CLICKHOUSE_CLIENT} --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh index c85455616e1..b078b4718c0 100755 --- a/tests/queries/0_stateless/01086_window_view_cleanup.sh +++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh @@ -5,7 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 --multiquery <&1| grep -Fa "Exception: " | grep -Fv "REPLICA_ALREADY_EXISTS" | grep -Fiv "Will not try to start it up" | \ grep -Fv "Coordination::Exception" | grep -Fv "already contains some data and it does not look like Replicated database path" sleep 0.$RANDOM diff --git a/tests/queries/0_stateless/01115_join_with_dictionary.sql b/tests/queries/0_stateless/01115_join_with_dictionary.sql index cde1385eaae..83227aa555a 100644 --- a/tests/queries/0_stateless/01115_join_with_dictionary.sql +++ b/tests/queries/0_stateless/01115_join_with_dictionary.sql @@ -88,11 +88,11 @@ SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s1 FULL JOIN dict_flat SELECT '-'; SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY INNER JOIN dict_flat d USING(key) ORDER BY s1.key; SELECT '-'; -SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY RIGHT JOIN dict_flat d USING(key) ORDER BY s1.key; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY RIGHT JOIN dict_flat d USING(key) ORDER BY key; SELECT '-'; SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 SEMI RIGHT JOIN dict_flat d USING(key) ORDER BY s1.key; SELECT '-'; -SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANTI RIGHT JOIN dict_flat d USING(key) ORDER BY s1.key; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANTI RIGHT JOIN dict_flat d USING(key) ORDER BY key; SET join_use_nulls = 0; diff --git a/tests/queries/0_stateless/01120_join_constants.sql b/tests/queries/0_stateless/01120_join_constants.sql index fdf297f5934..d8c8b5757f8 100644 --- a/tests/queries/0_stateless/01120_join_constants.sql +++ b/tests/queries/0_stateless/01120_join_constants.sql @@ -1,7 +1,7 @@ SELECT t1.*, t2.*, - 'world', + 'world' AS constant, isConstant('world') FROM ( @@ -19,7 +19,7 @@ LEFT JOIN SELECT t1.*, t2.*, - 123, + 123 AS constant, isConstant('world') FROM ( diff --git a/tests/queries/0_stateless/01129_dict_get_join_lose_constness.sql b/tests/queries/0_stateless/01129_dict_get_join_lose_constness.sql index 4f7197dc5ce..fd3e12f7a15 100644 --- a/tests/queries/0_stateless/01129_dict_get_join_lose_constness.sql +++ b/tests/queries/0_stateless/01129_dict_get_join_lose_constness.sql @@ -13,9 +13,9 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' PASSWORD '' TAB LIFETIME(0) LAYOUT(hashed()); -SELECT dictGetInt32('system.dict1', 'element_count', toUInt64(dict_key)) AS join_key, +SELECT join_key, toTimeZone(dictGetDateTime('system.dict1', 'loading_start_time', toUInt64(dict_key)), 'UTC') AS datetime -FROM (select 1 AS dict_key) js1 +FROM (select dictGetInt32('system.dict1', 'element_count', toUInt64(dict_key)) AS join_key, 1 AS dict_key) js1 LEFT JOIN (SELECT toInt32(2) AS join_key) js2 USING (join_key) WHERE now() >= datetime; diff --git a/tests/queries/0_stateless/01130_in_memory_parts.reference b/tests/queries/0_stateless/01130_in_memory_parts.reference deleted file mode 100644 index ad5435abb59..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts.reference +++ /dev/null @@ -1,39 +0,0 @@ -system.parts -InMemory 2 -1 -1 -Simple selects -0 0 -1 1 -2 2 -3 0 -4 1 -50 2 -51 0 -52 1 -53 2 -54 0 -34 -0 -20 -10 -Mutations and Alters -66 -1 1 -2 2 -4 1 -5 2 -7 1 -[1,1] -[] -[4,16] -[] -[7,49] -1 1 -2 1 -1 [1,1] -2 [] -4 [4,16] -5 [] -7 [7,49] -0 diff --git a/tests/queries/0_stateless/01130_in_memory_parts.sql b/tests/queries/0_stateless/01130_in_memory_parts.sql deleted file mode 100644 index 2b15ae24763..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts.sql +++ /dev/null @@ -1,48 +0,0 @@ --- Tags: no-s3-storage - -DROP TABLE IF EXISTS in_memory; -CREATE TABLE in_memory (a UInt32, b UInt32) - ENGINE = MergeTree ORDER BY a - SETTINGS min_rows_for_compact_part = 1000, min_rows_for_compact_part = 1000; - -INSERT INTO in_memory SELECT number, number % 3 FROM numbers(100); -SELECT 'system.parts'; -SELECT DISTINCT part_type, marks FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory' AND active; -SELECT DISTINCT data_uncompressed_bytes > 0 FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory' AND active; -SELECT DISTINCT column_data_uncompressed_bytes > 0 FROM system.parts_columns WHERE database = currentDatabase() AND table = 'in_memory' AND active; - -SELECT 'Simple selects'; - -SELECT * FROM in_memory ORDER BY a LIMIT 5; -SELECT * FROM in_memory ORDER BY a LIMIT 5 OFFSET 50; -SELECT count() FROM in_memory WHERE b = 0 SETTINGS max_block_size = 10; --- Check index -SELECT count() FROM in_memory WHERE a > 100 SETTINGS max_rows_to_read = 0, force_primary_key = 1; -SELECT count() FROM in_memory WHERE a >= 10 AND a < 30 SETTINGS force_primary_key = 1; -SELECT DISTINCT blockSize() FROM in_memory SETTINGS max_block_size = 10; - -SELECT 'Mutations and Alters'; -SET mutations_sync = 1; - -ALTER TABLE in_memory DELETE WHERE b = 0; - -SELECT count() FROM in_memory; -SELECT * FROM in_memory ORDER BY a LIMIT 5; - -ALTER TABLE in_memory ADD COLUMN arr Array(UInt64); -ALTER TABLE in_memory UPDATE arr = [a, a * a] WHERE b = 1; - -SELECT arr FROM in_memory ORDER BY a LIMIT 5; - -ALTER TABLE in_memory MODIFY COLUMN b String; -ALTER TABLE in_memory RENAME COLUMN b to str; -SELECT DISTINCT str, length(str) FROM in_memory ORDER BY str; -ALTER TABLE in_memory DROP COLUMN str; - -SELECT * FROM in_memory ORDER BY a LIMIT 5; - --- in-memory parts works if they're empty. -ALTER TABLE in_memory DELETE WHERE 1; -SELECT count() FROM in_memory; - -DROP TABLE in_memory; diff --git a/tests/queries/0_stateless/01130_in_memory_parts_check.reference b/tests/queries/0_stateless/01130_in_memory_parts_check.reference deleted file mode 100644 index 15f72836ff1..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_check.reference +++ /dev/null @@ -1 +0,0 @@ -201901_1_1_0 1 diff --git a/tests/queries/0_stateless/01130_in_memory_parts_check.sql b/tests/queries/0_stateless/01130_in_memory_parts_check.sql deleted file mode 100644 index c2f5eba5949..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_check.sql +++ /dev/null @@ -1,13 +0,0 @@ --- Tags: no-s3-storage - --- Part of 00961_check_table test, but with in-memory parts - -SET check_query_single_value_result = 0; -DROP TABLE IF EXISTS mt_table; -CREATE TABLE mt_table (d Date, key UInt64, data String) ENGINE = MergeTree() PARTITION BY toYYYYMM(d) ORDER BY key - SETTINGS min_rows_for_compact_part = 1000, min_rows_for_compact_part = 1000; - -CHECK TABLE mt_table; -INSERT INTO mt_table VALUES (toDate('2019-01-02'), 1, 'Hello'), (toDate('2019-01-02'), 2, 'World'); -CHECK TABLE mt_table; -DROP TABLE mt_table; diff --git a/tests/queries/0_stateless/01130_in_memory_parts_default.reference b/tests/queries/0_stateless/01130_in_memory_parts_default.reference deleted file mode 100644 index c51afdb6658..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_default.reference +++ /dev/null @@ -1 +0,0 @@ -0 bbb_aaa diff --git a/tests/queries/0_stateless/01130_in_memory_parts_default.sql b/tests/queries/0_stateless/01130_in_memory_parts_default.sql deleted file mode 100644 index 776d5f89fcf..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_default.sql +++ /dev/null @@ -1,21 +0,0 @@ --- Tags: no-s3-storage --- Test 01266_default_prewhere_reqq, but with in-memory parts -DROP TABLE IF EXISTS t1; - -CREATE TABLE t1 -( - date Date, - s1 String, - s2 String -) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(date) ORDER BY (date, s1) -SETTINGS index_granularity = 8192, min_rows_for_compact_part = 1000, min_rows_for_wide_part = 1000; - - -set max_threads=1; - -insert into t1 (date, s1,s2) values(today()-1,'aaa','bbb'); -alter table t1 add column s3 String DEFAULT concat(s2,'_',s1); --- insert into t1 (date, s1,s2) values(today(),'aaa2','bbb2'); -select ignore(date), s3 from t1 where s2='bbb'; - -DROP TABLE t1; diff --git a/tests/queries/0_stateless/01130_in_memory_parts_nested.reference b/tests/queries/0_stateless/01130_in_memory_parts_nested.reference deleted file mode 100644 index 3882abb9467..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_nested.reference +++ /dev/null @@ -1,25 +0,0 @@ -[0] -[0,0,0] -[0,0,0,0,0] -[0,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0] -[0] -[0,0,0] -[0,0,0,0,0] -[0,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0] -[0] -[0,0,0] -[0,0,0,0,0] -[0,0,0,0,0,0,0] -[0,0,0,0,0,0,0,0,0] -[0] -[0,2,4] -[0,2,4,6,8] -[0,2,4,6,8,10,12] -[0,2,4,6,8,10,12,14,16] -[0] [0] -[0,1,2] [0,2,4] -[0,1,2,3,4] [0,2,4,6,8] -[0,1,2,3,4,5,6] [0,2,4,6,8,10,12] -[0,1,2,3,4,5,6,7,8] [0,2,4,6,8,10,12,14,16] diff --git a/tests/queries/0_stateless/01130_in_memory_parts_nested.sql b/tests/queries/0_stateless/01130_in_memory_parts_nested.sql deleted file mode 100644 index 45e778b9f04..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_nested.sql +++ /dev/null @@ -1,19 +0,0 @@ --- Tags: no-s3-storage --- Test 00576_nested_and_prewhere, but with in-memory parts. -DROP TABLE IF EXISTS nested; - -CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeTree ORDER BY x - SETTINGS min_rows_for_compact_part = 200000, min_rows_for_wide_part = 300000; - -INSERT INTO nested SELECT number, number % 2, range(number % 10) FROM system.numbers LIMIT 100000; - -ALTER TABLE nested ADD COLUMN n.b Array(UInt64); -SELECT DISTINCT n.b FROM nested PREWHERE filter; -SELECT DISTINCT n.b FROM nested PREWHERE filter SETTINGS max_block_size = 123; -SELECT DISTINCT n.b FROM nested PREWHERE filter SETTINGS max_block_size = 1234; - -ALTER TABLE nested ADD COLUMN n.c Array(UInt64) DEFAULT arrayMap(x -> x * 2, n.a); -SELECT DISTINCT n.c FROM nested PREWHERE filter; -SELECT DISTINCT n.a, n.c FROM nested PREWHERE filter; - -DROP TABLE nested; diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference deleted file mode 100644 index 44cbbed3f57..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference +++ /dev/null @@ -1,60 +0,0 @@ -1 2 foo -1 3 bar -2 4 aa -2 5 bb -2 6 cc -3 7 qq -3 8 ww -3 9 ee -3 10 rr -1_1_1_0 InMemory 2 -2_2_2_0 InMemory 3 -3_3_3_0 InMemory 4 -^ init ================== -2 4 aa -2 5 bb -2 6 cc -3 7 qq -3 8 ww -3 9 ee -3 10 rr -2_2_2_0 InMemory 3 -3_3_3_0 InMemory 4 -^ drop 1 ================== -3 7 qq -3 8 ww -3 9 ee -3 10 rr -3_3_3_0 InMemory 4 -^ detach 2 ================== -2 4 aa -2 5 bb -2 6 cc -3 7 qq -3 8 ww -3 9 ee -3 10 rr -2_4_4_0 Compact 3 -3_3_3_0 InMemory 4 -^ attach 2 ================= -2 4 aa -2 5 bb -2 6 cc -3 7 qq -3 8 ww -3 9 ee -3 10 rr -2_4_4_0 Compact 3 -3_3_3_0 InMemory 4 -^ detach attach ================== -2 4 aa -2 5 bb -2 6 cc -3 11 tt -3 12 yy -t2 2_4_4_0 Compact 3 -t2 3_6_6_0 Compact 2 -t3 3_1_1_0 InMemory 2 -^ replace ================== -3_1_1_0 InMemory 1 2 -^ freeze ================== diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql deleted file mode 100644 index 18da2d2bd30..00000000000 --- a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql +++ /dev/null @@ -1,59 +0,0 @@ --- Tags: no-parallel, no-s3-storage - -DROP TABLE IF EXISTS t2; - -CREATE TABLE t2(id UInt32, a UInt64, s String) - ENGINE = MergeTree ORDER BY a PARTITION BY id - SETTINGS min_rows_for_compact_part = 1000, min_rows_for_wide_part = 2000; - -SYSTEM STOP MERGES t2; - -INSERT INTO t2 VALUES (1, 2, 'foo'), (1, 3, 'bar'); -INSERT INTO t2 VALUES (2, 4, 'aa'), (2, 5, 'bb'), (2, 6, 'cc'); -INSERT INTO t2 VALUES (3, 7, 'qq'), (3, 8, 'ww'), (3, 9, 'ee'), (3, 10, 'rr'); - -SELECT * FROM t2 ORDER BY a; -SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ init =================='; - -ALTER TABLE t2 DROP PARTITION 1; -SELECT * FROM t2 ORDER BY a; -SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ drop 1 =================='; - -ALTER TABLE t2 DETACH PARTITION 2; -SELECT * FROM t2 ORDER BY a; -SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ detach 2 =================='; - -ALTER TABLE t2 ATTACH PARTITION 2; -SELECT * FROM t2 ORDER BY a; -SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ attach 2 ================='; - -DETACH TABLE t2; -ATTACH TABLE t2; - -SELECT * FROM t2 ORDER BY a; -SELECT name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ detach attach =================='; - -DROP TABLE IF EXISTS t3; - -CREATE TABLE t3(id UInt32, a UInt64, s String) - ENGINE = MergeTree ORDER BY a PARTITION BY id - SETTINGS min_rows_for_compact_part = 1000, min_rows_for_wide_part = 2000; - -INSERT INTO t3 VALUES (3, 11, 'tt'), (3, 12, 'yy'); -ALTER TABLE t2 REPLACE PARTITION 3 FROM t3; -SELECT * FROM t2 ORDER BY a; -SELECT table, name, part_type, rows FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; -SELECT table, name, part_type, rows FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ replace =================='; - -ALTER TABLE t3 FREEZE PARTITION 3; -SELECT name, part_type, is_frozen, rows FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name; -SELECT '^ freeze =================='; - -DROP TABLE t2; -DROP TABLE t3; diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference index 01efbb7c64b..6d32c20909b 100644 --- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference +++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference @@ -3,15 +3,27 @@ 1 l \N Nullable(String) 2 \N Nullable(String) - +1 l Nullable(String) \N Nullable(String) +0 \N Nullable(String) \N Nullable(String) +0 \N Nullable(String) \N Nullable(String) +1 l Nullable(String) \N Nullable(String) +- +1 l LowCardinality(String) \N Nullable(String) +0 LowCardinality(String) \N Nullable(String) +0 LowCardinality(String) \N Nullable(String) +1 l LowCardinality(String) \N Nullable(String) +- +1 l \N Nullable(String) +0 \N \N Nullable(String) +0 \N \N Nullable(String) +1 l \N Nullable(String) +- 1 l \N Nullable(String) 0 \N Nullable(String) 0 \N Nullable(String) 1 l \N Nullable(String) - -1 l \N Nullable(String) -0 \N Nullable(String) -0 \N Nullable(String) -1 l \N Nullable(String) +0 \N - 0 - diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql index 38b72837174..2464b7a57cf 100644 --- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql +++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql @@ -15,19 +15,37 @@ SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORD SELECT '-'; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +-- lc should be supertype for l.lc and r.lc, so expect Nullable(String) +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; SELECT '-'; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +-- old behavior is different +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; SELECT '-'; -SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc); +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; + +SELECT '-'; + +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; + +SELECT '-'; + +SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc) SETTINGS allow_experimental_analyzer = 1; + +SELECT '-'; + +SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc) SETTINGS allow_experimental_analyzer = 0; SELECT '-'; diff --git a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference index c6bdcb773b2..bb29ec9becd 100644 --- a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference +++ b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference @@ -4,6 +4,16 @@ 2 \N Nullable(String) - 1 l \N Nullable(String) +0 \N \N Nullable(String) +0 \N \N Nullable(String) +1 l \N Nullable(String) +- +1 l \N Nullable(String) +0 \N \N Nullable(String) +0 \N \N Nullable(String) +1 l \N Nullable(String) +- +1 l \N Nullable(String) 0 \N Nullable(String) 0 \N Nullable(String) 1 l \N Nullable(String) diff --git a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql index dbc2d7c9f5d..718e8358c64 100644 --- a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql +++ b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql @@ -17,15 +17,27 @@ SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORD SELECT '-'; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; SELECT '-'; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; + +SELECT '-'; + +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; + +SELECT '-'; + +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; SELECT '-'; diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql index 7234cee96e0..505c406c2cc 100644 --- a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql +++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql @@ -12,7 +12,7 @@ DETACH TABLE rmt1; ATTACH TABLE rmt1; SHOW CREATE TABLE rmt1; -CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}{uuid}', '{default_name_test}') ORDER BY n; -- { serverError 62 } +CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}{uuid}', '{default_name_test}') ORDER BY n; -- { serverError 36 } CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}test_01148', '{default_name_test}') ORDER BY n; SHOW CREATE TABLE rmt; RENAME TABLE rmt TO rmt2; -- { serverError 48 } @@ -24,7 +24,7 @@ SET distributed_ddl_output_mode='none'; DROP DATABASE IF EXISTS test_01148_atomic; CREATE DATABASE test_01148_atomic ENGINE=Atomic; CREATE TABLE test_01148_atomic.rmt2 ON CLUSTER test_shard_localhost (n int, PRIMARY KEY n) ENGINE=ReplicatedMergeTree; -CREATE TABLE test_01148_atomic.rmt3 AS test_01148_atomic.rmt2; -- { serverError 62 } +CREATE TABLE test_01148_atomic.rmt3 AS test_01148_atomic.rmt2; -- { serverError 36 } CREATE TABLE test_01148_atomic.rmt4 ON CLUSTER test_shard_localhost AS test_01148_atomic.rmt2; SHOW CREATE TABLE test_01148_atomic.rmt2; RENAME TABLE test_01148_atomic.rmt4 to test_01148_atomic.rmt3; diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.reference b/tests/queries/0_stateless/01158_zookeeper_log_long.reference index a0088610c9d..7ec52cb3366 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.reference +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.reference @@ -18,22 +18,18 @@ Response 0 Create /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 4 Request 0 Exists /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 0 \N \N \N 0 0 0 0 Response 0 Exists /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 0 ZOK \N \N 0 0 96 0 blocks -Request 0 Multi 0 0 \N 3 0 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 \N \N \N 0 0 0 0 -Request 0 Remove /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 \N \N \N 0 0 0 0 -Response 0 Multi 0 0 \N 3 0 ZOK \N \N 0 0 0 0 -Response 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 ZOK \N \N /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 0 0 -Response 0 Remove /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 ZOK \N \N 0 0 0 0 -Response 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 ZOK \N \N /test/01158/default/rmt/block_numbers/all/block-0000000000 0 0 0 0 -Request 0 Multi 0 0 \N 3 0 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 \N \N \N 0 0 0 0 -Request 0 Remove /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 \N \N \N 0 0 0 0 -Response 0 Multi 0 0 \N 3 0 ZNODEEXISTS \N \N 0 0 0 0 -Response 0 Error /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 ZNODEEXISTS \N \N 0 0 0 0 -Response 0 Error /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 ZRUNTIMEINCONSISTENCY \N \N 0 0 0 0 -Response 0 Error /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 ZRUNTIMEINCONSISTENCY \N \N 0 0 0 0 +Request 0 Multi 0 0 \N 2 0 \N \N \N 0 0 0 0 +Request 0 CheckNotExists /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 \N \N \N 0 0 0 0 +Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 \N \N \N 0 0 0 0 +Response 0 Multi 0 0 \N 2 0 ZOK \N \N 0 0 0 0 +Response 0 CheckNotExists /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 ZOK \N \N 0 0 0 0 +Response 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 ZOK \N \N /test/01158/default/rmt/block_numbers/all/block-0000000000 0 0 0 0 +Request 0 Multi 0 0 \N 2 0 \N \N \N 0 0 0 0 +Request 0 CheckNotExists /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 \N \N \N 0 0 0 0 +Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 \N \N \N 0 0 0 0 +Response 0 Multi 0 0 \N 2 0 ZNODEEXISTS \N \N 0 0 0 0 +Response 0 Error /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 ZNODEEXISTS \N \N 0 0 0 0 +Response 0 Error /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 ZRUNTIMEINCONSISTENCY \N \N 0 0 0 0 Request 0 Get /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 0 \N \N \N 0 0 0 0 Response 0 Get /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 0 ZOK \N \N 0 0 9 0 duration_ms diff --git a/tests/queries/0_stateless/01160_table_dependencies.reference b/tests/queries/0_stateless/01160_table_dependencies.reference index ead8377abc5..9fcd9bc504c 100644 --- a/tests/queries/0_stateless/01160_table_dependencies.reference +++ b/tests/queries/0_stateless/01160_table_dependencies.reference @@ -30,6 +30,14 @@ mv [] [] [] s [] ['join'] ['t'] t ['mv'] ['dict1','join','s'] [] OK +UNKNOWN_DATABASE +dict1 [] ['dict_src'] ['join','t'] +dict2 [] ['join'] [] +dict_src [] [] ['dict1'] +join [] ['dict1'] ['dict2','s','t'] +mv [] [] [] +s [] ['join'] ['t'] +t ['mv'] ['dict1','join','s'] [] dict1 [] ['dict_src'] ['join','t'] dict2 [] ['join'] [] dict_src [] [] ['dict1'] diff --git a/tests/queries/0_stateless/01160_table_dependencies.sh b/tests/queries/0_stateless/01160_table_dependencies.sh index a0a3f05c6a9..acb6522e9e2 100755 --- a/tests/queries/0_stateless/01160_table_dependencies.sh +++ b/tests/queries/0_stateless/01160_table_dependencies.sh @@ -64,6 +64,10 @@ else echo "OK" fi +$CLICKHOUSE_CLIENT -q "rename table t to ${CLICKHOUSE_DATABASE}_2.t" |& grep -m1 -F -o UNKNOWN_DATABASE +$CLICKHOUSE_CLIENT -q "select table, arraySort(dependencies_table), +arraySort(loading_dependencies_table), arraySort(loading_dependent_table) from system.tables where database in (currentDatabase(), '$t_database') order by table" + $CLICKHOUSE_CLIENT -q "drop table mv" $CLICKHOUSE_CLIENT -q "create database ${CLICKHOUSE_DATABASE}_1" diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 086f409c9a3..7640b9dddf2 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n" +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" $CLICKHOUSE_CLIENT -q "insert into mt values (3)" diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference index e69de29bb2d..6ed281c757a 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql index 9279de1a5b3..924798b0050 100644 --- a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql +++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql @@ -5,11 +5,12 @@ create table rmt2 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{d system stop replicated sends rmt1; insert into rmt1 values (now(), arrayJoin([1, 2])); -- { clientError 36 } -insert into rmt1(n) select * from system.numbers limit arrayJoin([1, 2]); -- { serverError 36 } +insert into rmt1(n) select * from system.numbers limit arrayJoin([1, 2]); -- { serverError 36, 440 } insert into rmt1 values (now(), rand()); drop table rmt1; system sync replica rmt2; +select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2'; drop table rmt2; @@ -21,6 +22,7 @@ insert into rmt1 values (now(), rand()); drop table rmt1; system sync replica rmt2; +select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2'; drop table rmt2; diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index a385fc81fe4..508ad05224c 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -2,6 +2,7 @@ # Tags: long, no-replicated-database, no-ordinary-database # shellcheck disable=SC2015 +# shellcheck disable=SC2119 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 12b654f4215..199c2b5389f 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash # Tags: long, no-parallel, no-ordinary-database # Test is too heavy, avoid parallel run in Flaky Check +# shellcheck disable=SC2119 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01174_select_insert_isolation.sh b/tests/queries/0_stateless/01174_select_insert_isolation.sh index dc5c1d7a722..29ccfbb1ccb 100755 --- a/tests/queries/0_stateless/01174_select_insert_isolation.sh +++ b/tests/queries/0_stateless/01174_select_insert_isolation.sh @@ -2,6 +2,7 @@ # Tags: long, no-ordinary-database # shellcheck disable=SC2015 +# shellcheck disable=SC2119 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql b/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql index fbb67a268ae..6edaaa5c602 100644 --- a/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql +++ b/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql @@ -1,8 +1,8 @@ -- Tags: no-replicated-database -- Tag no-replicated-database: ON CLUSTER is not allowed -DROP TABLE IF EXISTS test_repl ON CLUSTER test_shard_localhost SYNC; +DROP TABLE IF EXISTS test_repl ON CLUSTER test_shard_localhost NO DELAY; CREATE TABLE test_repl ON CLUSTER test_shard_localhost (n UInt64) ENGINE ReplicatedMergeTree('/clickhouse/test_01181/{database}/test_repl','r1') ORDER BY tuple(); -DETACH TABLE test_repl ON CLUSTER test_shard_localhost SYNC; +DETACH TABLE test_repl ON CLUSTER test_shard_localhost NO DELAY; ATTACH TABLE test_repl ON CLUSTER test_shard_localhost; -DROP TABLE test_repl ON CLUSTER test_shard_localhost SYNC; +DROP TABLE test_repl ON CLUSTER test_shard_localhost NO DELAY; diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.reference b/tests/queries/0_stateless/01190_full_attach_syntax.reference deleted file mode 100644 index 61a0603975a..00000000000 --- a/tests/queries/0_stateless/01190_full_attach_syntax.reference +++ /dev/null @@ -1,13 +0,0 @@ -CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'[HIDDEN]\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) -CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'[HIDDEN]\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) -CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log -CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log -test -CREATE TABLE test_01190.mt\n(\n `key` Array(UInt8),\n `s` String,\n `n` UInt64,\n `d` Date MATERIALIZED \'2000-01-01\'\n)\nENGINE = MergeTree(d, (key, s, n), 1) -[1,2] Hello 2 -CREATE TABLE test_01190.mt\n(\n `key` Array(UInt8),\n `s` String,\n `n` UInt64,\n `d` Date\n)\nENGINE = MergeTree(d, (key, s, n), 1) -CREATE MATERIALIZED VIEW test_01190.mv\n(\n `s` String\n)\nENGINE = Null AS\nSELECT *\nFROM test_01190.log -CREATE MATERIALIZED VIEW test_01190.mv\n(\n `s` String\n)\nENGINE = Null AS\nSELECT *\nFROM test_01190.log -CREATE MATERIALIZED VIEW test_01190.mv\n(\n `key` Array(UInt8),\n `s` String,\n `n` UInt64,\n `d` Date\n)\nENGINE = Null AS\nSELECT *\nFROM test_01190.mt -CREATE LIVE VIEW test_01190.lv\n(\n `1` UInt8\n) AS\nSELECT 1 -CREATE LIVE VIEW test_01190.lv\n(\n `1` UInt8\n) AS\nSELECT 1 diff --git a/tests/queries/0_stateless/01190_full_attach_syntax.sql b/tests/queries/0_stateless/01190_full_attach_syntax.sql deleted file mode 100644 index e66978e22e1..00000000000 --- a/tests/queries/0_stateless/01190_full_attach_syntax.sql +++ /dev/null @@ -1,60 +0,0 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS test_01190; -set allow_deprecated_database_ordinary=1; -set allow_deprecated_syntax_for_merge_tree=1; -CREATE DATABASE test_01190 ENGINE=Ordinary; -- Full ATTACH requires UUID with Atomic -USE test_01190; - -CREATE TABLE test_01190.table_for_dict (key UInt64, col UInt8) ENGINE = Memory; - -CREATE DICTIONARY test_01190.dict (key UInt64 DEFAULT 0, col UInt8 DEFAULT 1) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'test_01190')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()); - -SHOW CREATE DICTIONARY test_01190.dict; - -DETACH DICTIONARY test_01190.dict; -ATTACH TABLE test_01190.dict; -- { serverError 80 } --- Full ATTACH syntax is not allowed for dictionaries -ATTACH DICTIONARY test_01190.dict (key UInt64 DEFAULT 0, col UInt8 DEFAULT 42) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'test_01190')) LIFETIME(MIN 1 MAX 100) LAYOUT(FLAT()); -- { clientError 62 } -ATTACH DICTIONARY test_01190.dict; -SHOW CREATE DICTIONARY test_01190.dict; - -CREATE TABLE log ENGINE = Log AS SELECT 'test' AS s; -SHOW CREATE log; -DETACH TABLE log; -ATTACH DICTIONARY log; -- { serverError 80 } -ATTACH TABLE log (s String) ENGINE = Log(); -SHOW CREATE log; -SELECT * FROM log; - -DROP TABLE IF EXISTS mt; -CREATE TABLE mt (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree(d, (key, s, n), 1); -INSERT INTO mt VALUES ([1, 2], 'Hello', 2); -DETACH TABLE mt; -ATTACH TABLE mt (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree ORDER BY (key, s, n) PARTITION BY toYYYYMM(d); -- { serverError 342 } -ATTACH TABLE mt (key Array(UInt8), s String, n UInt64, d Date MATERIALIZED '2000-01-01') ENGINE = MergeTree(d, (key, s, n), 1); -SHOW CREATE mt; -SELECT * FROM mt; -DETACH TABLE mt; -ATTACH TABLE mt (key Array(UInt8), s String, n UInt64, d Date) ENGINE = MergeTree(d, (key, s, n), 1); -- It works (with Ordinary database), but probably it shouldn't -SHOW CREATE mt; - -CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM log; -SHOW CREATE mv; -DETACH VIEW mv; -ATTACH MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM log; -SHOW CREATE mv; -DETACH VIEW mv; -ATTACH MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM mt; -- It works (with Ordinary database), but probably it shouldn't -SHOW CREATE mv; - -SET allow_experimental_live_view = 1; -CREATE LIVE VIEW lv AS SELECT 1; -SHOW CREATE lv; -DETACH VIEW lv; -ATTACH LIVE VIEW lv AS SELECT 1; -SHOW CREATE lv; - -DROP DATABASE test_01190; - - diff --git a/tests/queries/0_stateless/01193_metadata_loading.sh b/tests/queries/0_stateless/01193_metadata_loading.sh index 1604de6004a..d0f3001fceb 100755 --- a/tests/queries/0_stateless/01193_metadata_loading.sh +++ b/tests/queries/0_stateless/01193_metadata_loading.sh @@ -13,6 +13,11 @@ threads=10 count_multiplier=1 max_time_ms=1000 +# In case of s390x, the query execution time seems to be approximately ~1.1 to ~1.2 secs. So, to match the query execution time, set max_time_ms=1500 +if [[ $(uname -a | grep s390x) ]]; then + max_time_ms=1500 +fi + debug_or_sanitizer_build=$($CLICKHOUSE_CLIENT -q "WITH ((SELECT value FROM system.build_options WHERE name='BUILD_TYPE') AS build, (SELECT value FROM system.build_options WHERE name='CXX_FLAGS') as flags) SELECT build='Debug' OR flags LIKE '%fsanitize%' OR hasThreadFuzzer()") if [[ debug_or_sanitizer_build -eq 1 ]]; then tables=100; count_multiplier=10; max_time_ms=1500; fi diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index e75780a4520..b146d65fc58 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest # shellcheck disable=SC2206 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01201_read_single_thread_in_order.sql b/tests/queries/0_stateless/01201_read_single_thread_in_order.sql index 33ccbbbe84d..24ed935a125 100644 --- a/tests/queries/0_stateless/01201_read_single_thread_in_order.sql +++ b/tests/queries/0_stateless/01201_read_single_thread_in_order.sql @@ -6,7 +6,7 @@ CREATE TABLE t ) ENGINE = MergeTree ORDER BY number -SETTINGS index_granularity = 128, index_granularity_bytes = '10Mi'; +SETTINGS index_granularity = 128, ratio_of_defaults_for_sparse_serialization = 1.0, index_granularity_bytes = '10Mi'; SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; INSERT INTO t SELECT number FROM numbers(10000000); diff --git a/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql b/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql index 65adaf3ad71..de41132df62 100644 --- a/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql +++ b/tests/queries/0_stateless/01211_optimize_skip_unused_shards_type_mismatch.sql @@ -9,7 +9,7 @@ create table data_02000 (key Int) Engine=Null(); create table dist_02000 as data_02000 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_02000, key); select * from data_02000 where key = 0xdeadbeafdeadbeaf; -select * from dist_02000 where key = 0xdeadbeafdeadbeaf settings force_optimize_skip_unused_shards=2; -- { serverError 507; } +select * from dist_02000 where key = 0xdeadbeafdeadbeaf settings force_optimize_skip_unused_shards=2; -- { serverError 507 } select * from dist_02000 where key = 0xdeadbeafdeadbeaf; drop table data_02000; diff --git a/tests/queries/0_stateless/01223_dist_on_dist.sql b/tests/queries/0_stateless/01223_dist_on_dist.sql index 65a240fd48b..4cd8ffd7a03 100644 --- a/tests/queries/0_stateless/01223_dist_on_dist.sql +++ b/tests/queries/0_stateless/01223_dist_on_dist.sql @@ -33,9 +33,9 @@ select 'cluster() GROUP BY ORDER BY'; select * from cluster(test_cluster_two_shards, currentDatabase(), dist_01223) group by key order by key; select 'LEFT JOIN'; -select toInt32(number) key, b.key from numbers(2) a left join (select distinct * from dist_01223) b using key order by b.key; +select a.key, b.key from (SELECT toInt32(number) key from numbers(2)) a left join (select distinct * from dist_01223) b using key order by b.key; select 'RIGHT JOIN'; -select toInt32(number) key, b.key from numbers(2) a right join (select distinct * from dist_01223) b using key order by b.key; +select a.key, b.key from (SELECT toInt32(number) key from numbers(2)) a right join (select distinct * from dist_01223) b using key order by b.key; -- more data for GROUP BY insert into data_01223 select number%3 from numbers(30); diff --git a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql index 513ecbd4ed4..be2f7b2a9bf 100644 --- a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql +++ b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql @@ -16,7 +16,7 @@ LAYOUT(FLAT()); SYSTEM RELOAD DICTIONARY dict_db_01225.dict; -DROP TABLE dict_db_01225.dict; -- { serverError 520; } +DROP TABLE dict_db_01225.dict; -- { serverError 520 } DROP DICTIONARY dict_db_01225.dict; DROP DATABASE dict_db_01225; diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql index 09cde642ed2..bc733a0c546 100644 --- a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql +++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql @@ -18,7 +18,7 @@ LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.dict` FORMAT TSVRaw; -SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 487; } +SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 487 } DROP DATABASE dict_db_01225; DROP DATABASE dict_db_01225_dictionary; diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql index c2470bb9a56..0ed5e3e605c 100644 --- a/tests/queries/0_stateless/01231_log_queries_min_type.sql +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -15,7 +15,7 @@ select count() from system.query_log where current_database = currentDatabase() set max_rows_to_read='100K'; set log_queries_min_type='EXCEPTION_WHILE_PROCESSING'; -select '01231_log_queries_min_type/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158; } +select '01231_log_queries_min_type/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158 } set max_rows_to_read=0; system flush logs; select count() from system.query_log where current_database = currentDatabase() @@ -23,7 +23,7 @@ select count() from system.query_log where current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'; set max_rows_to_read='100K'; -select '01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158; } +select '01231_log_queries_min_type w/ Settings/EXCEPTION_WHILE_PROCESSING', max(number) from system.numbers limit 1e6; -- { serverError 158 } system flush logs; set max_rows_to_read=0; select count() from system.query_log where diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference index 8e1f97d2585..7bb80bf618f 100644 --- a/tests/queries/0_stateless/01232_untuple.reference +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -2,7 +2,7 @@ hello 1 3 world 9 9 (0,1) -key tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 1) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 2) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 3) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 4) tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), 5) +key tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'1\') tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'2\') tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'3\') tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'4\') tupleElement(argMax(tuple(v1, v2, v3, v4, v5), v1), \'5\') 1 20 20 10 20 30 2 11 20 10 20 30 3 70 20 10 20 30 diff --git a/tests/queries/0_stateless/01232_untuple.sql b/tests/queries/0_stateless/01232_untuple.sql index 92150e92b29..ccefd13a772 100644 --- a/tests/queries/0_stateless/01232_untuple.sql +++ b/tests/queries/0_stateless/01232_untuple.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + select untuple((* except (b),)) from (select 1 a, 2 b, 3 c); select 'hello', untuple((* except (b),)), 'world' from (select 1 a, 2 b, 3 c); select argMax(untuple(x)) from (select (number, number + 1) as x from numbers(10)); diff --git a/tests/queries/0_stateless/01235_live_view_over_distributed.reference b/tests/queries/0_stateless/01235_live_view_over_distributed.reference deleted file mode 100644 index 00fc99d96ba..00000000000 --- a/tests/queries/0_stateless/01235_live_view_over_distributed.reference +++ /dev/null @@ -1,4 +0,0 @@ -2020-01-01 -2020-01-01 -2020-01-02 -2020-01-02 diff --git a/tests/queries/0_stateless/01235_live_view_over_distributed.sql b/tests/queries/0_stateless/01235_live_view_over_distributed.sql deleted file mode 100644 index 32ec0a6127f..00000000000 --- a/tests/queries/0_stateless/01235_live_view_over_distributed.sql +++ /dev/null @@ -1,23 +0,0 @@ --- Tags: distributed, no-replicated-database, no-parallel, no-fasttest - -set insert_distributed_sync = 1; -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS visits; -DROP TABLE IF EXISTS visits_layer; - -CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate); -CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'visits', rand()); - -CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate; - -INSERT INTO visits_layer (StartDate) VALUES ('2020-01-01'); -INSERT INTO visits_layer (StartDate) VALUES ('2020-01-02'); - -SELECT * FROM lv; - -DROP TABLE visits; -DROP TABLE visits_layer; - -DROP TABLE lv; diff --git a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.reference b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.reference deleted file mode 100644 index dfb4d0552f5..00000000000 --- a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.reference +++ /dev/null @@ -1,8 +0,0 @@ -2020-01-01 -2020-01-01 -2020-01-02 -2020-01-02 -2020-01-01 -2020-01-01 -2020-01-02 -2020-01-02 diff --git a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql deleted file mode 100644 index 3822f22d353..00000000000 --- a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql +++ /dev/null @@ -1,25 +0,0 @@ --- Tags: distributed, no-replicated-database, no-parallel, no-fasttest, no-random-settings - -SET allow_experimental_live_view = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS visits; -DROP TABLE IF EXISTS visits_layer; - -CREATE TABLE visits (StartDate Date) ENGINE MergeTree ORDER BY(StartDate); -CREATE TABLE visits_layer (StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'visits', rand()); - -CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate; - -CREATE TABLE visits_layer_lv (StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'lv', rand()); - -INSERT INTO visits_layer (StartDate) VALUES ('2020-01-01'); -INSERT INTO visits_layer (StartDate) VALUES ('2020-01-02'); - -SELECT * FROM visits_layer_lv; - -DROP TABLE visits; -DROP TABLE visits_layer; - -DROP TABLE lv; -DROP TABLE visits_layer_lv; diff --git a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.reference b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.reference deleted file mode 100644 index 00fc99d96ba..00000000000 --- a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.reference +++ /dev/null @@ -1,4 +0,0 @@ -2020-01-01 -2020-01-01 -2020-01-02 -2020-01-02 diff --git a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql deleted file mode 100644 index b7c1c6eea76..00000000000 --- a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql +++ /dev/null @@ -1,23 +0,0 @@ --- Tags: distributed, no-replicated-database, no-parallel, no-fasttest - -SET allow_experimental_live_view = 1; -SET insert_distributed_sync = 1; - -DROP TABLE IF EXISTS lv; -DROP TABLE IF EXISTS visits; -DROP TABLE IF EXISTS visits_layer; - -CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate); -CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'visits', rand()); - -CREATE LIVE VIEW lv AS SELECT foo.x FROM (SELECT StartDate AS x FROM visits_layer) AS foo ORDER BY foo.x; - -INSERT INTO visits_layer (StartDate) VALUES ('2020-01-01'); -INSERT INTO visits_layer (StartDate) VALUES ('2020-01-02'); - -SELECT * FROM lv; - -DROP TABLE visits; -DROP TABLE visits_layer; - -DROP TABLE lv; diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql index 47891a7f00e..ac507d94b69 100644 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ b/tests/queries/0_stateless/01246_buffer_flush.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + drop table if exists data_01256; drop table if exists buffer_01256; diff --git a/tests/queries/0_stateless/01246_insert_into_watch_live_view.py b/tests/queries/0_stateless/01246_insert_into_watch_live_view.py deleted file mode 100755 index 02d03abc8c3..00000000000 --- a/tests/queries/0_stateless/01246_insert_into_watch_live_view.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python3 -# Tags: no-replicated-database, no-parallel, no-fasttest - -import os -import sys -import time -import signal - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from client import client, prompt, end_of_block - -log = None -# uncomment the line below for debugging -# log=sys.stdout - -with client(name="client1>", log=log) as client1, client( - name="client2>", log=log -) as client2, client(name="client3>", log=log) as client3: - client1.expect(prompt) - client2.expect(prompt) - client3.expect(prompt) - - client1.send("SET allow_experimental_live_view = 1") - client1.expect(prompt) - client3.send("SET allow_experimental_live_view = 1") - client3.expect(prompt) - - client1.send("DROP TABLE IF EXISTS test.lv") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.lv_sums") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.mt") - client1.expect(prompt) - client1.send("DROP TABLE IF EXISTS test.sums") - client1.expect(prompt) - client1.send("CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()") - client1.expect(prompt) - client1.send("CREATE LIVE VIEW test.lv AS SELECT sum(a) AS s FROM test.mt") - client1.expect(prompt) - client1.send( - "CREATE TABLE test.sums (s Int32, version Int32) Engine=MergeTree ORDER BY tuple()" - ) - client1.expect(prompt) - client3.send( - "CREATE LIVE VIEW test.lv_sums AS SELECT * FROM test.sums ORDER BY version" - ) - client3.expect(prompt) - - client3.send("WATCH test.lv_sums FORMAT CSVWithNames") - - client1.send("INSERT INTO test.sums WATCH test.lv") - client1.expect(r"INSERT INTO") - - client3.expect("0,1.*\n") - - client2.send("INSERT INTO test.mt VALUES (1),(2),(3)") - client2.expect(prompt) - client3.expect("6,2.*\n") - - client2.send("INSERT INTO test.mt VALUES (4),(5),(6)") - client2.expect(prompt) - client3.expect("21,3.*\n") - - # send Ctrl-C - client3.send("\x03", eol="") - match = client3.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client3.send(client3.command) - client3.expect(prompt) - - # send Ctrl-C - client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) - if match.groups()[1]: - client1.send(client1.command) - client1.expect(prompt) - - client2.send("DROP TABLE test.lv") - client2.expect(prompt) - client2.send("DROP TABLE test.lv_sums") - client2.expect(prompt) - client2.send("DROP TABLE test.sums") - client2.expect(prompt) - client2.send("DROP TABLE test.mt") - client2.expect(prompt) diff --git a/tests/queries/0_stateless/01268_dictionary_direct_layout.sql b/tests/queries/0_stateless/01268_dictionary_direct_layout.sql index 914d24a740a..45b5c580561 100644 --- a/tests/queries/0_stateless/01268_dictionary_direct_layout.sql +++ b/tests/queries/0_stateless/01268_dictionary_direct_layout.sql @@ -75,7 +75,7 @@ CREATE DICTIONARY db_01268.dict2 ) PRIMARY KEY region_id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict2' PASSWORD '' DB 'database_for_dict_01268')) -LAYOUT(DIRECT()); +LAYOUT(DIRECT()) SETTINGS(dictionary_use_async_executor=1, max_threads=8); CREATE DICTIONARY db_01268.dict3 ( diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c061eb95a65..ec245d8b9e0 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -39,7 +39,7 @@ ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE -ALTER NAMED COLLECTION [] \N ALTER +ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL ALTER TABLE [] \N ALTER ALTER DATABASE [] \N ALTER ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW @@ -50,17 +50,19 @@ CREATE DATABASE [] DATABASE CREATE CREATE TABLE [] TABLE CREATE CREATE VIEW [] VIEW CREATE CREATE DICTIONARY [] DICTIONARY CREATE -CREATE TEMPORARY TABLE [] GLOBAL CREATE +CREATE TEMPORARY TABLE [] GLOBAL CREATE ARBITRARY TEMPORARY TABLE +CREATE ARBITRARY TEMPORARY TABLE [] GLOBAL CREATE CREATE FUNCTION [] GLOBAL CREATE -CREATE NAMED COLLECTION [] GLOBAL CREATE +CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP DROP FUNCTION [] GLOBAL DROP -DROP NAMED COLLECTION [] GLOBAL DROP +DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL DROP [] \N ALL +UNDROP TABLE [] TABLE ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL BACKUP [] TABLE ALL @@ -89,9 +91,10 @@ SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY' SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT -SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] GLOBAL ACCESS MANAGEMENT -SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] GLOBAL ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL +SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL +SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL +NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE @@ -133,8 +136,10 @@ SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM +SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL +displaySecretsInShowAndSelect [] GLOBAL ALL addressToLine [] GLOBAL INTROSPECTION addressToLineWithInlines [] GLOBAL INTROSPECTION addressToSymbol [] GLOBAL INTROSPECTION diff --git a/tests/queries/0_stateless/01278_alter_rename_combination.reference b/tests/queries/0_stateless/01278_alter_rename_combination.reference index cc912e9b265..e70c2d2e6f8 100644 --- a/tests/queries/0_stateless/01278_alter_rename_combination.reference +++ b/tests/queries/0_stateless/01278_alter_rename_combination.reference @@ -1,7 +1,7 @@ -CREATE TABLE default.rename_table\n(\n `key` Int32,\n `old_value1` Int32,\n `value1` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table\n(\n `key` Int32,\n `old_value1` Int32,\n `value1` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 key old_value1 value1 1 2 3 -CREATE TABLE default.rename_table\n(\n `k` Int32,\n `v1` Int32,\n `v2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table\n(\n `k` Int32,\n `v1` Int32,\n `v2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 k v1 v2 1 2 3 4 5 6 diff --git a/tests/queries/0_stateless/01278_alter_rename_combination.sql b/tests/queries/0_stateless/01278_alter_rename_combination.sql index fa73362622c..51322f5d86f 100644 --- a/tests/queries/0_stateless/01278_alter_rename_combination.sql +++ b/tests/queries/0_stateless/01278_alter_rename_combination.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS rename_table; -CREATE TABLE rename_table (key Int32, value1 Int32, value2 Int32) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE rename_table (key Int32, value1 Int32, value2 Int32) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0; INSERT INTO rename_table VALUES (1, 2, 3); diff --git a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh index 0e258bbbb09..08cc97c84bf 100755 --- a/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh +++ b/tests/queries/0_stateless/01278_min_insert_block_size_rows_for_materialized_views.sh @@ -76,7 +76,7 @@ insert into data_01278 select reinterpretAsString(number), // s6 reinterpretAsString(number), // s7 reinterpretAsString(number) // s8 -from numbers(100000); -- { serverError 241; }" > /dev/null 2>&1 +from numbers(100000); -- { serverError 241 }" > /dev/null 2>&1 local ret_code=$? if [[ $ret_code -eq 0 ]]; then diff --git a/tests/queries/0_stateless/01278_random_string_utf8.reference b/tests/queries/0_stateless/01278_random_string_utf8.reference index 36ae0ace76a..45efb26db75 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.reference +++ b/tests/queries/0_stateless/01278_random_string_utf8.reference @@ -2,3 +2,4 @@ String 1 +99 diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql index f2c0a6c41c7..76349d9d814 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.sql +++ b/tests/queries/0_stateless/01278_random_string_utf8.sql @@ -3,3 +3,4 @@ SELECT lengthUTF8(randomStringUTF8(100)); SELECT toTypeName(randomStringUTF8(10)); SELECT isValidUTF8(randomStringUTF8(100000)); SELECT randomStringUTF8(0); +SELECT lengthUTF8(lowerUTF8(randomStringUTF8(99))); -- bug #49672: msan assert diff --git a/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference b/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference index bf3358aea60..532b8ce8712 100644 --- a/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference +++ b/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference @@ -1,11 +1,11 @@ -CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 key value1_string value2 1 2 3 -CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int32,\n `value2` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int32,\n `value2` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 key value1_string value2_old value2 1 2 3 7 4 5 6 7 -CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 key value1_string value2_old 1 2 7 4 5 7 diff --git a/tests/queries/0_stateless/01281_alter_rename_and_other_renames.sql b/tests/queries/0_stateless/01281_alter_rename_and_other_renames.sql index f9462f0478e..b0ccd7751ab 100644 --- a/tests/queries/0_stateless/01281_alter_rename_and_other_renames.sql +++ b/tests/queries/0_stateless/01281_alter_rename_and_other_renames.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS rename_table_multiple; -CREATE TABLE rename_table_multiple (key Int32, value1 String, value2 Int32) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE rename_table_multiple (key Int32, value1 String, value2 Int32) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0; INSERT INTO rename_table_multiple VALUES (1, 2, 3); diff --git a/tests/queries/0_stateless/01284_port.sql.j2 b/tests/queries/0_stateless/01284_port.sql.j2 index 6f78b3b8e3b..50e096c6deb 100644 --- a/tests/queries/0_stateless/01284_port.sql.j2 +++ b/tests/queries/0_stateless/01284_port.sql.j2 @@ -19,9 +19,9 @@ select port{{ suffix }}('http://127.0.0.1/', toUInt16(80)); select port{{ suffix }}('http://foobar.com/', toUInt16(80)); -- unsupported -/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43; } -/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43; } -/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42; } +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43 } +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port{{ suffix }}('', 1); -- { serverError 43 } +/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port{{ suffix }}('', 1, 1); -- { serverError 42 } -- -- Known limitations of domain() (getURLHost()) diff --git a/tests/queries/0_stateless/01291_aggregation_in_order.reference b/tests/queries/0_stateless/01291_aggregation_in_order.reference index c072a8aed3e..cf058b9f2f5 100644 --- a/tests/queries/0_stateless/01291_aggregation_in_order.reference +++ b/tests/queries/0_stateless/01291_aggregation_in_order.reference @@ -22,8 +22,8 @@ 2 4 109 2 1 619 1 2 537 2 -1 619 1 -2 537 2 +-2 537 2 +-1 619 1 2019-05-05 00:00:00 -45363190 2019-05-05 00:00:00 -1249512288 2019-05-05 00:00:00 345522721 diff --git a/tests/queries/0_stateless/01291_aggregation_in_order.sql b/tests/queries/0_stateless/01291_aggregation_in_order.sql index c4357811520..e93eadc3329 100644 --- a/tests/queries/0_stateless/01291_aggregation_in_order.sql +++ b/tests/queries/0_stateless/01291_aggregation_in_order.sql @@ -14,7 +14,7 @@ SELECT a FROM pk_order GROUP BY a ORDER BY a; SELECT a, b, sum(c), avg(d) FROM pk_order GROUP BY a, b ORDER BY a, b; SELECT a, sum(c), avg(d) FROM pk_order GROUP BY a ORDER BY a; -SELECT a, sum(c), avg(d) FROM pk_order GROUP BY -a ORDER BY a; +SELECT -a, sum(c), avg(d) FROM pk_order GROUP BY -a ORDER BY -a; DROP TABLE IF EXISTS pk_order; @@ -27,7 +27,7 @@ INSERT INTO pk_order set max_block_size = 1; SELECT d, max(b) FROM pk_order GROUP BY d, a ORDER BY d, a LIMIT 5; -SELECT d, avg(a) FROM pk_order GROUP BY toString(d) ORDER BY toString(d) LIMIT 5; +SELECT toString(d), avg(a) FROM pk_order GROUP BY toString(d) ORDER BY toString(d) LIMIT 5; SELECT toStartOfHour(d) as d1, min(a), max(b) FROM pk_order GROUP BY d1 ORDER BY d1 LIMIT 5; DROP TABLE pk_order; diff --git a/tests/queries/0_stateless/01291_geo_types.sql b/tests/queries/0_stateless/01291_geo_types.sql index 6b686ddf520..4038c545679 100644 --- a/tests/queries/0_stateless/01291_geo_types.sql +++ b/tests/queries/0_stateless/01291_geo_types.sql @@ -1,7 +1,5 @@ DROP TABLE IF EXISTS geo; -SET allow_experimental_geo_types = 1; - CREATE TABLE geo (a Point, b Ring, c Polygon, d MultiPolygon) ENGINE=Memory(); INSERT INTO geo VALUES((0, 0), [(0, 0), (10, 0), (10, 10), (0, 10)], [[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]], [[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]); diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index f723412c636..eb89a5ed38c 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -13,6 +13,8 @@ CREATE USER u4_01292 IDENTIFIED WITH sha256_password CREATE USER u5_01292 IDENTIFIED WITH sha256_password CREATE USER u6_01292 IDENTIFIED WITH double_sha1_password CREATE USER u7_01292 IDENTIFIED WITH double_sha1_password +CREATE USER u8_01292 IDENTIFIED WITH bcrypt_password +CREATE USER u9_01292 IDENTIFIED WITH bcrypt_password CREATE USER u1_01292 IDENTIFIED WITH sha256_password CREATE USER u2_01292 IDENTIFIED WITH sha256_password CREATE USER u3_01292 IDENTIFIED WITH sha256_password diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql index d0f157d36b0..a283ce687e6 100644 --- a/tests/queries/0_stateless/01292_create_user.sql +++ b/tests/queries/0_stateless/01292_create_user.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel DROP USER IF EXISTS u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292; DROP USER IF EXISTS u10_01292, u11_01292, u12_01292, u13_01292, u14_01292, u15_01292, u16_01292; @@ -31,6 +31,8 @@ CREATE USER u4_01292 IDENTIFIED WITH sha256_password BY 'qwe123'; CREATE USER u5_01292 IDENTIFIED WITH sha256_hash BY '18138372FAD4B94533CD4881F03DC6C69296DD897234E0CEE83F727E2E6B1F63'; CREATE USER u6_01292 IDENTIFIED WITH double_sha1_password BY 'qwe123'; CREATE USER u7_01292 IDENTIFIED WITH double_sha1_hash BY '8DCDD69CE7D121DE8013062AEAEB2A148910D50E'; +CREATE USER u8_01292 IDENTIFIED WITH bcrypt_password BY 'qwe123'; +CREATE USER u9_01292 IDENTIFIED WITH bcrypt_hash BY '$2a$12$rz5iy2LhuwBezsM88ZzWiemOVUeJ94xHTzwAlLMDhTzwUxOHaY64q'; SHOW CREATE USER u1_01292; SHOW CREATE USER u2_01292; SHOW CREATE USER u3_01292; @@ -38,6 +40,8 @@ SHOW CREATE USER u4_01292; SHOW CREATE USER u5_01292; SHOW CREATE USER u6_01292; SHOW CREATE USER u7_01292; +SHOW CREATE USER u8_01292; +SHOW CREATE USER u9_01292; ALTER USER u1_01292 IDENTIFIED BY '123qwe'; ALTER USER u2_01292 IDENTIFIED BY '123qwe'; ALTER USER u3_01292 IDENTIFIED BY '123qwe'; @@ -48,7 +52,7 @@ SHOW CREATE USER u2_01292; SHOW CREATE USER u3_01292; SHOW CREATE USER u4_01292; SHOW CREATE USER u5_01292; -DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292; SELECT '-- host'; CREATE USER u1_01292 HOST ANY; diff --git a/tests/queries/0_stateless/01293_optimize_final_force.sh b/tests/queries/0_stateless/01293_optimize_final_force.sh index 60d45f87385..994d5952dbc 100755 --- a/tests/queries/0_stateless/01293_optimize_final_force.sh +++ b/tests/queries/0_stateless/01293_optimize_final_force.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index c62f8cdfa2d..9f8a44ebd0a 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,2 +1,3 @@ test_shard_localhost -test_shard_localhost 1 1 1 localhost ::1 9000 1 default +test_cluster_one_shard_two_replicas 1 1 1 127.0.0.1 127.0.0.1 9000 1 default +test_cluster_one_shard_two_replicas 1 1 2 127.0.0.2 127.0.0.2 9000 0 default diff --git a/tests/queries/0_stateless/01293_show_clusters.sh b/tests/queries/0_stateless/01293_show_clusters.sh index 2fdf17ec25e..ae027210383 100755 --- a/tests/queries/0_stateless/01293_show_clusters.sh +++ b/tests/queries/0_stateless/01293_show_clusters.sh @@ -6,4 +6,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "show clusters like 'test_shard%' limit 1" # cluster,shard_num,shard_weight,replica_num,host_name,host_address,port,is_local,user,default_database[,errors_count,slowdowns_count,estimated_recovery_time] -$CLICKHOUSE_CLIENT -q "show cluster 'test_shard_localhost'" | cut -f-10 +# use a cluster with static IPv4 +$CLICKHOUSE_CLIENT -q "show cluster 'test_cluster_one_shard_two_replicas'" | cut -f-10 diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index f11956e1893..f053387d1c5 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -3,6 +3,6 @@ connect_timeout Seconds 10 connect_timeout_with_failover_ms Milliseconds 2000 connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 +filesystem_prefetch_max_memory_usage UInt64 1073741824 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 -filesystem_prefetch_max_memory_usage UInt64 1073741824 diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference index f3f833d7619..6b844e0d2f6 100644 --- a/tests/queries/0_stateless/01297_create_quota.reference +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -64,3 +64,40 @@ q4_01297 604800 0 \N \N \N \N \N \N \N \N \N \N -- query_selects query_inserts CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297 CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297 +-- size suffix +-- functional test +CREATE QUOTA q1_01297 FOR INTERVAL 1 minute MAX query_selects = 12000 +CREATE QUOTA q2_01297 FOR INTERVAL 1 minute MAX query_selects = 12288 +CREATE QUOTA q3_01297 FOR INTERVAL 1 minute MAX query_selects = 12000000 +CREATE QUOTA q4_01297 FOR INTERVAL 1 minute MAX query_selects = 12582912 +CREATE QUOTA q5_01297 FOR INTERVAL 1 minute MAX query_selects = 12000000000 +CREATE QUOTA q6_01297 FOR INTERVAL 1 minute MAX query_selects = 12884901888 +CREATE QUOTA q7_01297 FOR INTERVAL 1 minute MAX query_selects = 12000000000000 +CREATE QUOTA q8_01297 FOR INTERVAL 1 minute MAX query_selects = 13194139533312 +CREATE QUOTA q9_01297 FOR INTERVAL 1 minute MAX execution_time = 12000 +CREATE QUOTA q10_01297 FOR INTERVAL 1 minute MAX execution_time = 12288 +CREATE QUOTA q11_01297 FOR INTERVAL 1 minute MAX execution_time = 12000000 +CREATE QUOTA q12_01297 FOR INTERVAL 1 minute MAX execution_time = 12582912 +CREATE QUOTA q13_01297 FOR INTERVAL 1 minute MAX execution_time = 12000000000 +CREATE QUOTA q14_01297 FOR INTERVAL 1 minute MAX execution_time = 12884901888 +CREATE QUOTA q15_01297 FOR INTERVAL 1 minute MAX query_selects = 1 +CREATE QUOTA q16_01297 FOR INTERVAL 1 minute MAX execution_time = 1.5 +-- overflow test +CREATE QUOTA q1_01297 FOR INTERVAL 1 minute MAX query_selects = 18446744073709551615 +CREATE QUOTA q2_01297 FOR INTERVAL 1 minute MAX execution_time = 18446744073 +-- zero test +CREATE QUOTA q1_01297 FOR INTERVAL 1 minute MAX query_selects = 0 +CREATE QUOTA q2_01297 FOR INTERVAL 1 minute MAX execution_time = 0 +-- underflow test +-- syntax test +CREATE QUOTA q1_01297 FOR INTERVAL 1 minute MAX query_selects = 12 +CREATE QUOTA q2_01297 FOR INTERVAL 1 minute MAX execution_time = 12 +CREATE QUOTA q3_01297 FOR INTERVAL 1 minute MAX query_selects = 12000 +CREATE QUOTA q4_01297 FOR INTERVAL 1 minute MAX execution_time = 12000 +CREATE QUOTA q5_01297 FOR INTERVAL 1 minute MAX execution_time = 0 +CREATE QUOTA q6_01297 FOR INTERVAL 1 minute MAX execution_time = 0 +CREATE QUOTA q7_01297 FOR INTERVAL 1 minute MAX execution_time = 0 +CREATE QUOTA q8_01297 FOR INTERVAL 1 minute MAX execution_time = 0 +CREATE QUOTA q9_01297 FOR INTERVAL 1 minute MAX execution_time = 123000 +CREATE QUOTA q10_01297 FOR INTERVAL 1 minute MAX execution_time = 123000 +-- bad syntax test diff --git a/tests/queries/0_stateless/01297_create_quota.sql b/tests/queries/0_stateless/01297_create_quota.sql index b30d453257a..a0ecb6bd2d0 100644 --- a/tests/queries/0_stateless/01297_create_quota.sql +++ b/tests/queries/0_stateless/01297_create_quota.sql @@ -1,7 +1,8 @@ -- Tags: no-parallel -DROP QUOTA IF EXISTS q1_01297, q2_01297, q3_01297, q4_01297, q5_01297, q6_01297, q7_01297, q8_01297, q9_01297, q10_01297; -DROP QUOTA IF EXISTS q11_01297, q12_01297; +DROP QUOTA IF EXISTS q1_01297, q2_01297, q3_01297, q4_01297, q5_01297, q6_01297, q7_01297, q8_01297, q9_01297, +q10_01297, q11_01297, q12_01297, q13_01297, q14_01297, q15_01297, q16_01297, q17_01297,q18_01297; + DROP QUOTA IF EXISTS q2_01297_renamed; DROP USER IF EXISTS u1_01297; DROP ROLE IF EXISTS r1_01297; @@ -137,3 +138,112 @@ DROP QUOTA q1_01297, q2_01297; DROP ROLE r1_01297; DROP USER u1_01297; +SELECT '-- size suffix'; +SELECT '-- functional test'; +CREATE QUOTA q1_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12K'; +CREATE QUOTA q2_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12Ki'; +CREATE QUOTA q3_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12M'; +CREATE QUOTA q4_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12Mi'; +CREATE QUOTA q5_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12G'; +CREATE QUOTA q6_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12Gi'; +CREATE QUOTA q7_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12T'; +CREATE QUOTA q8_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '12Ti'; +CREATE QUOTA q9_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '12K'; +CREATE QUOTA q10_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '12Ki'; +CREATE QUOTA q11_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '12M'; +CREATE QUOTA q12_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '12Mi'; +CREATE QUOTA q13_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '12G'; +CREATE QUOTA q14_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '12Gi'; +CREATE QUOTA q15_01297 FOR INTERVAL 1 MINUTE MAX query_selects = 1.5; +CREATE QUOTA q16_01297 FOR INTERVAL 1 MINUTE MAX execution_time = 1.5; +CREATE QUOTA q17_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '1.5'; -- { clientError 27 } +CREATE QUOTA q18_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '1.5'; -- { clientError 27 } +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; +SHOW CREATE QUOTA q5_01297; +SHOW CREATE QUOTA q6_01297; +SHOW CREATE QUOTA q7_01297; +SHOW CREATE QUOTA q8_01297; +SHOW CREATE QUOTA q9_01297; +SHOW CREATE QUOTA q10_01297; +SHOW CREATE QUOTA q11_01297; +SHOW CREATE QUOTA q12_01297; +SHOW CREATE QUOTA q13_01297; +SHOW CREATE QUOTA q14_01297; +SHOW CREATE QUOTA q15_01297; +SHOW CREATE QUOTA q16_01297; +DROP QUOTA IF EXISTS q1_01297; +DROP QUOTA IF EXISTS q2_01297; +DROP QUOTA IF EXISTS q3_01297; +DROP QUOTA IF EXISTS q4_01297; +DROP QUOTA IF EXISTS q5_01297; +DROP QUOTA IF EXISTS q6_01297; +DROP QUOTA IF EXISTS q7_01297; +DROP QUOTA IF EXISTS q8_01297; +DROP QUOTA IF EXISTS q9_01297; +DROP QUOTA IF EXISTS q10_01297; +DROP QUOTA IF EXISTS q11_01297; +DROP QUOTA IF EXISTS q12_01297; +DROP QUOTA IF EXISTS q13_01297; +DROP QUOTA IF EXISTS q14_01297; +DROP QUOTA IF EXISTS q15_01297; +DROP QUOTA IF EXISTS q16_01297; +SELECT '-- overflow test'; +CREATE QUOTA q1_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '18446744073709551615'; +CREATE QUOTA q2_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '18446744073'; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +DROP QUOTA IF EXISTS q1_01297; +DROP QUOTA IF EXISTS q2_01297; +SELECT '-- zero test'; +CREATE QUOTA q1_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '0'; +CREATE QUOTA q2_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '0'; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +DROP QUOTA IF EXISTS q1_01297; +DROP QUOTA IF EXISTS q2_01297; +SELECT '-- underflow test'; +CREATE QUOTA q1_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '-1'; -- { clientError 72 } +CREATE QUOTA q2_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '-1'; -- { clientError 72 } +SELECT '-- syntax test'; +CREATE QUOTA q1_01297 FOR INTERVAL 1 MINUTE MAX query_selects = ' 12 '; +CREATE QUOTA q2_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 12 '; +CREATE QUOTA q3_01297 FOR INTERVAL 1 MINUTE MAX query_selects = ' 12k '; +CREATE QUOTA q4_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 12k '; +CREATE QUOTA q5_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 00 '; +CREATE QUOTA q6_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 00 '; +CREATE QUOTA q7_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 00k '; +CREATE QUOTA q8_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 00k '; +CREATE QUOTA q9_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 00123k '; +CREATE QUOTA q10_01297 FOR INTERVAL 1 MINUTE MAX execution_time = ' 00123k '; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; +SHOW CREATE QUOTA q5_01297; +SHOW CREATE QUOTA q6_01297; +SHOW CREATE QUOTA q7_01297; +SHOW CREATE QUOTA q8_01297; +SHOW CREATE QUOTA q9_01297; +SHOW CREATE QUOTA q10_01297; +DROP QUOTA IF EXISTS q1_01297; +DROP QUOTA IF EXISTS q2_01297; +DROP QUOTA IF EXISTS q3_01297; +DROP QUOTA IF EXISTS q4_01297; +DROP QUOTA IF EXISTS q5_01297; +DROP QUOTA IF EXISTS q6_01297; +DROP QUOTA IF EXISTS q7_01297; +DROP QUOTA IF EXISTS q8_01297; +DROP QUOTA IF EXISTS q9_01297; +DROP QUOTA IF EXISTS q10_01297; +SELECT '-- bad syntax test'; +CREATE QUOTA q1_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '1 1'; -- { clientError 27 } +CREATE QUOTA q2_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '1 1'; -- { clientError 27 } +CREATE QUOTA q3_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '1K 1'; -- { clientError 27 } +CREATE QUOTA q4_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '1K 1'; -- { clientError 27 } +CREATE QUOTA q5_01297 FOR INTERVAL 1 MINUTE MAX query_selects = '1K1'; -- { clientError 27 } +CREATE QUOTA q6_01297 FOR INTERVAL 1 MINUTE MAX execution_time = '1K1'; -- { clientError 27 } +CREATE QUOTA q7_01297 FOR INTERVAL 1 MINUTE MAX query_selects = 'foo'; -- { clientError 27 } +CREATE QUOTA q8_01297 FOR INTERVAL 1 MINUTE MAX execution_time = 'bar'; -- { clientError 27 } diff --git a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect index a593075bb9a..c897d7e9772 100755 --- a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect +++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect @@ -28,7 +28,7 @@ exec kill -9 [exp_pid] close # Run client one more time and press "up" to see the last recorded query -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" expect ":) " send -- "\[A" expect "for the history" diff --git a/tests/queries/0_stateless/01300_group_by_other_keys_having.sql b/tests/queries/0_stateless/01300_group_by_other_keys_having.sql index d209c5f24e3..911f61a62e2 100644 --- a/tests/queries/0_stateless/01300_group_by_other_keys_having.sql +++ b/tests/queries/0_stateless/01300_group_by_other_keys_having.sql @@ -1,5 +1,5 @@ set optimize_group_by_function_keys = 1; - +set allow_experimental_analyzer = 1; -- { echoOn } SELECT round(avg(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY (number % 2) * (number % 3), number % 3, number % 2 HAVING avg(log(2) * number) > 3465735.3 ORDER BY k; diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference index 1f9646ac112..7feea7cec35 100644 Binary files a/tests/queries/0_stateless/01308_orc_output_format_arrays.reference and b/tests/queries/0_stateless/01308_orc_output_format_arrays.reference differ diff --git a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh index 1d9aea353b6..498854874cf 100755 --- a/tests/queries/0_stateless/01308_orc_output_format_arrays.sh +++ b/tests/queries/0_stateless/01308_orc_output_format_arrays.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE orc (array1 Array(Int32), array2 Array( $CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES ([1,2,3,4,5], [[1,2], [3,4], [5]]), ([42], [[42, 42], [42]])"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum; $CLICKHOUSE_CLIENT --query="DROP TABLE orc"; diff --git a/tests/queries/0_stateless/01310_enum_comparison.sql b/tests/queries/0_stateless/01310_enum_comparison.sql index 26901a61b2b..ed63911e698 100644 --- a/tests/queries/0_stateless/01310_enum_comparison.sql +++ b/tests/queries/0_stateless/01310_enum_comparison.sql @@ -3,4 +3,4 @@ INSERT INTO enum VALUES ('hello'); SELECT count() FROM enum WHERE x = 'hello'; SELECT count() FROM enum WHERE x = 'world'; -SELECT count() FROM enum WHERE x = 'xyz'; -- { serverError 36 } +SELECT count() FROM enum WHERE x = 'xyz'; -- { serverError 691 } diff --git a/tests/queries/0_stateless/01318_alter_add_constraint_format.reference b/tests/queries/0_stateless/01318_alter_add_constraint_format.reference index 7a3b41536e0..9f58d161539 100644 --- a/tests/queries/0_stateless/01318_alter_add_constraint_format.reference +++ b/tests/queries/0_stateless/01318_alter_add_constraint_format.reference @@ -1,2 +1 @@ -ALTER TABLE replicated_constraints1 - ADD CONSTRAINT IF NOT EXISTS b_constraint CHECK b > 10 +ALTER TABLE replicated_constraints1 ADD CONSTRAINT IF NOT EXISTS b_constraint CHECK b > 10 diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference index 91a96eb68a3..bf184d142ec 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference @@ -65,6 +65,7 @@ QUERY id: 0 SORT id: 12, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + SETTINGS allow_experimental_analyzer=1 SELECT groupArray(x) FROM ( @@ -98,6 +99,7 @@ QUERY id: 0 SORT id: 12, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + SETTINGS allow_experimental_analyzer=1 SELECT groupArray(x) FROM ( @@ -139,6 +141,7 @@ QUERY id: 0 SORT id: 15, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + SETTINGS allow_experimental_analyzer=1 SELECT key, a, @@ -200,6 +203,7 @@ QUERY id: 0 SORT id: 25, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 26, column_name: key, result_type: UInt64, source_id: 5 + SETTINGS allow_experimental_analyzer=1 SELECT key, a @@ -225,6 +229,7 @@ QUERY id: 0 SORT id: 7, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 + SETTINGS allow_experimental_analyzer=1 SELECT key, a @@ -257,6 +262,7 @@ QUERY id: 0 LIST id: 11, nodes: 2 COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 + SETTINGS allow_experimental_analyzer=1 QUERY id: 0 PROJECTION COLUMNS key UInt64 @@ -279,6 +285,7 @@ QUERY id: 0 SORT id: 10, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + SETTINGS allow_experimental_analyzer=1 QUERY id: 0 PROJECTION COLUMNS t1.id UInt64 @@ -307,6 +314,7 @@ QUERY id: 0 SORT id: 14, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 15, column_name: id, result_type: UInt64, source_id: 5 + SETTINGS allow_experimental_analyzer=1 [0,1,2] [0,1,2] [0,1,2] diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql index 338c1345052..738ad581e3d 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql @@ -20,25 +20,25 @@ SELECT key, a FROM test ORDER BY key, a, exp(key + a) SETTINGS allow_experimenta SELECT key, a FROM test ORDER BY key, exp(key + a); SELECT key, a FROM test ORDER BY key, exp(key + a) SETTINGS allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); -EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)) settings allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); -EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))) settings allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); -EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x) settings allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; -EXPLAIN QUERY TREE run_passes=1 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; +EXPLAIN QUERY TREE run_passes=1 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key settings allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a); -EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a) settings allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); -EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a); -EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key; +EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a) settings allow_experimental_analyzer=1; +EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key settings allow_experimental_analyzer=1; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; CREATE TABLE t1 (id UInt64) ENGINE = MergeTree() ORDER BY id; CREATE TABLE t2 (id UInt64) ENGINE = MergeTree() ORDER BY id; -EXPLAIN QUERY TREE run_passes=1 SELECT * FROM t1 INNER JOIN t2 ON t1.id = t2.id ORDER BY t1.id, t2.id; +EXPLAIN QUERY TREE run_passes=1 SELECT * FROM t1 INNER JOIN t2 ON t1.id = t2.id ORDER BY t1.id, t2.id settings allow_experimental_analyzer=1; set optimize_redundant_functions_in_order_by = 0; diff --git a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference index 60c6076aef0..a905ea97ae5 100644 --- a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference +++ b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference @@ -1,24 +1,24 @@ hello test hello test -1_0_0_0 hello 1 -1_0_0_0 hello 1 +0 0 hello 1 +0 0 hello 1 hello test goodbye test hello test goodbye test -3_0_0_1 goodbye 3 -1_0_0_1 hello 1 -3_0_0_1 goodbye 3 -1_0_0_1 hello 1 +0 0 goodbye 3 +0 0 hello 1 +0 0 goodbye 3 +0 0 hello 1 1 test 3 test 111 abc 1 test 3 test 111 abc -1_0_0_2 1 1 -111_0_0_1 111 111 -3_0_0_2 3 3 -1_0_0_2 1 1 -111_0_0_1 111 111 -3_0_0_2 3 3 +0 0 1 1 +0 0 111 111 +0 0 3 3 +0 0 1 1 +0 0 111 111 +0 0 3 3 diff --git a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql index f20156fd9e3..d40bcc15e55 100644 --- a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql +++ b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql @@ -13,16 +13,17 @@ INSERT INTO test VALUES ('hello', 'test'); SELECT * FROM test; SYSTEM SYNC REPLICA test2; SELECT * FROM test2; -SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition; -SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition; +SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition; +SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition; ALTER TABLE test MODIFY COLUMN x Enum('hello' = 1, 'world' = 2, 'goodbye' = 3); INSERT INTO test VALUES ('goodbye', 'test'); OPTIMIZE TABLE test FINAL; SELECT * FROM test ORDER BY x; +SYSTEM SYNC REPLICA test2; SELECT * FROM test2 ORDER BY x; -SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition; -SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition; +SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition; +SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition; ALTER TABLE test MODIFY COLUMN x Enum('hello' = 1, 'world' = 2); -- { serverError 524 } ALTER TABLE test MODIFY COLUMN x Enum('hello' = 1, 'world' = 2, 'test' = 3); @@ -33,9 +34,10 @@ ALTER TABLE test MODIFY COLUMN x Int8; INSERT INTO test VALUES (111, 'abc'); OPTIMIZE TABLE test FINAL; SELECT * FROM test ORDER BY x; +SYSTEM SYNC REPLICA test2; SELECT * FROM test2 ORDER BY x; -SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition; -SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition; +SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition; +SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition; ALTER TABLE test MODIFY COLUMN x Enum8('' = 1); -- { serverError 524 } ALTER TABLE test MODIFY COLUMN x Enum16('' = 1); -- { serverError 524 } diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh index 12a65a96c9b..2ee2ec1bc76 100755 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'connect timed out|Network is unreachable' | wc -l +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable' | wc -l diff --git a/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql b/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql index 88a2b25c2db..8ff9cd2b9f2 100644 --- a/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql +++ b/tests/queries/0_stateless/01375_GROUP_BY_injective_elimination_dictGet_BAD_ARGUMENTS.sql @@ -1 +1 @@ -SELECT dictGetString(concat('default', '.countryId'), 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36; } +SELECT dictGetString(concat('default', '.countryId'), 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36 } diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql index 258d96829a5..29ffcb46fbf 100644 --- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql +++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel -- https://github.com/ClickHouse/ClickHouse/issues/11469 -SELECT dictGet('default.countryId', 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36; } +SELECT dictGet('default.countryId', 'country', toUInt64(number)) AS country FROM numbers(2) GROUP BY country; -- { serverError 36 } -- with real dictionary diff --git a/tests/queries/0_stateless/01380_coded_delta_exception_code.sql b/tests/queries/0_stateless/01380_coded_delta_exception_code.sql index 587fac958cd..f4b88a93904 100644 --- a/tests/queries/0_stateless/01380_coded_delta_exception_code.sql +++ b/tests/queries/0_stateless/01380_coded_delta_exception_code.sql @@ -2,5 +2,5 @@ CREATE TABLE delta_codec_synthetic (`id` Decimal(38, 10) CODEC(Delta, ZSTD(22))) CREATE TABLE delta_codec_synthetic (`id` Decimal(38, 10) CODEC(DoubleDelta, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } CREATE TABLE delta_codec_synthetic (`id` Decimal(38, 10) CODEC(Gorilla, ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE delta_codec_synthetic (`id` UInt64 CODEC(DoubleDelta(3), ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE delta_codec_synthetic (`id` UInt64 CODEC(Gorilla('hello, world'), ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError 36 } +CREATE TABLE delta_codec_synthetic (`id` UInt64 CODEC(DoubleDelta(3), ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_CODEC_PARAMETER } +CREATE TABLE delta_codec_synthetic (`id` UInt64 CODEC(Gorilla('hello, world'), ZSTD(22))) ENGINE = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_CODEC_PARAMETER } diff --git a/tests/queries/0_stateless/01395_limit_more_cases.sh b/tests/queries/0_stateless/01395_limit_more_cases.sh index 32c854e53fb..177147d2142 100755 --- a/tests/queries/0_stateless/01395_limit_more_cases.sh +++ b/tests/queries/0_stateless/01395_limit_more_cases.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql b/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql index 3b53e593095..1d445412381 100644 --- a/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql +++ b/tests/queries/0_stateless/01402_cast_nullable_string_to_enum.sql @@ -5,9 +5,9 @@ SELECT CAST(CAST(NULL AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- empty string still not acceptable -SELECT CAST(CAST('' AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36; } -SELECT CAST(CAST('' AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- { serverError 36; } +SELECT CAST(CAST('' AS Nullable(String)) AS Nullable(Enum8('Hello' = 1))); -- { serverError 691 } +SELECT CAST(CAST('' AS Nullable(FixedString(1))) AS Nullable(Enum8('Hello' = 1))); -- { serverError 691 } -- non-Nullable Enum() still not acceptable -SELECT CAST(CAST(NULL AS Nullable(String)) AS Enum8('Hello' = 1)); -- { serverError 349; } -SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Enum8('Hello' = 1)); -- { serverError 349; } +SELECT CAST(CAST(NULL AS Nullable(String)) AS Enum8('Hello' = 1)); -- { serverError 349 } +SELECT CAST(CAST(NULL AS Nullable(FixedString(1))) AS Enum8('Hello' = 1)); -- { serverError 349 } diff --git a/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql b/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql index 4ee6e1fa5e4..d61a35c9999 100644 --- a/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql +++ b/tests/queries/0_stateless/01404_roundUpToPowerOfTwoOrZero_safety.sql @@ -1,4 +1,4 @@ -- repeat() with this length and this number of rows will allocation huge enough region (MSB set), -- which will cause roundUpToPowerOfTwoOrZero() returns 0 for such allocation (before the fix), -- and later repeat() will try to use this memory and will got SIGSEGV. -SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131; } +SELECT repeat('0.0001048576', number * (number * (number * 255))) FROM numbers(65535); -- { serverError 131 } diff --git a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql index 363b1d92dbb..e1b8c1d5a76 100644 --- a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql +++ b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql @@ -1,5 +1,5 @@ SELECT arrayFilter((a) -> ((a, arrayJoin([])) IN (Null, [Null])), []); SELECT arrayFilter((a) -> ((a, arrayJoin([[]])) IN (Null, [Null])), []); -SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43; } +SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43 } SELECT * FROM numbers(1) LEFT ARRAY JOIN arrayFilter((x_0, x_1) -> (arrayJoin([]) IN (NULL)), [], []) AS arr_x; diff --git a/tests/queries/0_stateless/01408_range_overflow.sql b/tests/queries/0_stateless/01408_range_overflow.sql index 2107e8c3f36..d26507f8358 100644 --- a/tests/queries/0_stateless/01408_range_overflow.sql +++ b/tests/queries/0_stateless/01408_range_overflow.sql @@ -1,7 +1,7 @@ -- executeGeneric() SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807); SELECT range(1025, 1048576 + (9223372036854775807 AS i), i); -SELECT range(1025, 18446744073709551615, 1); -- { serverError 69; } +SELECT range(1025, 18446744073709551615, 1); -- { serverError 69 } -- executeConstStep() SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025; diff --git a/tests/queries/0_stateless/01411_from_unixtime.reference b/tests/queries/0_stateless/01411_from_unixtime.reference index 4114c9b98d3..f5b3be44e00 100644 --- a/tests/queries/0_stateless/01411_from_unixtime.reference +++ b/tests/queries/0_stateless/01411_from_unixtime.reference @@ -24,7 +24,7 @@ Jan Jan 366 366 00 00 01 01 -33 00 +January January \n \n AM AM AM diff --git a/tests/queries/0_stateless/01418_custom_settings.reference b/tests/queries/0_stateless/01418_custom_settings.reference index cf0cb35c72a..8484a5d0e6f 100644 --- a/tests/queries/0_stateless/01418_custom_settings.reference +++ b/tests/queries/0_stateless/01418_custom_settings.reference @@ -1,3 +1,4 @@ +--- assigning --- 5 UInt8 -177 Int16 98.11 Float64 @@ -6,7 +7,7 @@ custom_a UInt64_5 custom_b Int64_-177 custom_c Float64_98.11 custom_d \'abc def\' - +--- modifying --- changed String \N Nullable(Nothing) 50000 UInt16 @@ -15,9 +16,10 @@ custom_a \'changed\' custom_b NULL custom_c UInt64_50000 custom_d Float64_1.11 - +--- undefined setting --- 404 UInt16 - +--- wrong prefix --- +--- using query context --- -0.333 Float64 custom_e Float64_-0.333 404 UInt16 @@ -25,7 +27,13 @@ custom_e UInt64_404 word String custom_f \'word\' 0 - +--- compound identifier --- test String custom_compound.identifier.v1 \'test\' CREATE SETTINGS PROFILE s1_01418 SETTINGS custom_compound.identifier.v2 = 100 +--- null type --- +\N Nullable(Nothing) +custom_null NULL +\N Nullable(Nothing) +custom_null NULL +CREATE SETTINGS PROFILE s2_01418 SETTINGS custom_null = NULL diff --git a/tests/queries/0_stateless/01418_custom_settings.sql b/tests/queries/0_stateless/01418_custom_settings.sql index 95051db3a34..be18f553589 100644 --- a/tests/queries/0_stateless/01418_custom_settings.sql +++ b/tests/queries/0_stateless/01418_custom_settings.sql @@ -1,3 +1,6 @@ +DROP SETTINGS PROFILE IF EXISTS s1_01418, s2_01418; + +SELECT '--- assigning ---'; SET custom_a = 5; SET custom_b = -177; SET custom_c = 98.11; @@ -8,7 +11,7 @@ SELECT getSetting('custom_c') as v, toTypeName(v); SELECT getSetting('custom_d') as v, toTypeName(v); SELECT name, value FROM system.settings WHERE name LIKE 'custom_%' ORDER BY name; -SELECT ''; +SELECT '--- modifying ---'; SET custom_a = 'changed'; SET custom_b = NULL; SET custom_c = 50000; @@ -19,14 +22,15 @@ SELECT getSetting('custom_c') as v, toTypeName(v); SELECT getSetting('custom_d') as v, toTypeName(v); SELECT name, value FROM system.settings WHERE name LIKE 'custom_%' ORDER BY name; -SELECT ''; +SELECT '--- undefined setting ---'; SELECT getSetting('custom_e') as v, toTypeName(v); -- { serverError 115 } -- Setting not found. SET custom_e = 404; SELECT getSetting('custom_e') as v, toTypeName(v); +SELECT '--- wrong prefix ---'; SET invalid_custom = 8; -- { serverError 115 } -- Setting is neither a builtin nor started with one of the registered prefixes for user-defined settings. -SELECT ''; +SELECT '--- using query context ---'; SELECT getSetting('custom_e') as v, toTypeName(v) SETTINGS custom_e = -0.333; SELECT name, value FROM system.settings WHERE name = 'custom_e' SETTINGS custom_e = -0.333; SELECT getSetting('custom_e') as v, toTypeName(v); @@ -37,7 +41,7 @@ SELECT name, value FROM system.settings WHERE name = 'custom_f' SETTINGS custom_ SELECT getSetting('custom_f') as v, toTypeName(v); -- { serverError 115 } -- Setting not found. SELECT COUNT() FROM system.settings WHERE name = 'custom_f'; -SELECT ''; +SELECT '--- compound identifier ---'; SET custom_compound.identifier.v1 = 'test'; SELECT getSetting('custom_compound.identifier.v1') as v, toTypeName(v); SELECT name, value FROM system.settings WHERE name = 'custom_compound.identifier.v1'; @@ -45,3 +49,15 @@ SELECT name, value FROM system.settings WHERE name = 'custom_compound.identifier CREATE SETTINGS PROFILE s1_01418 SETTINGS custom_compound.identifier.v2 = 100; SHOW CREATE SETTINGS PROFILE s1_01418; DROP SETTINGS PROFILE s1_01418; + +SELECT '--- null type ---'; +SELECT getSetting('custom_null') as v, toTypeName(v) SETTINGS custom_null = NULL; +SELECT name, value FROM system.settings WHERE name = 'custom_null' SETTINGS custom_null = NULL; + +SET custom_null = NULL; +SELECT getSetting('custom_null') as v, toTypeName(v); +SELECT name, value FROM system.settings WHERE name = 'custom_null'; + +CREATE SETTINGS PROFILE s2_01418 SETTINGS custom_null = NULL; +SHOW CREATE SETTINGS PROFILE s2_01418; +DROP SETTINGS PROFILE s2_01418; diff --git a/tests/queries/0_stateless/01428_nullable_asof_join.reference b/tests/queries/0_stateless/01428_nullable_asof_join.reference index f04655fefaa..73825dce725 100644 --- a/tests/queries/0_stateless/01428_nullable_asof_join.reference +++ b/tests/queries/0_stateless/01428_nullable_asof_join.reference @@ -5,6 +5,15 @@ left asof using 0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 \N 1 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 1 2 2 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) left asof on 0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) 1 \N 1 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) @@ -12,9 +21,28 @@ left asof on 0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 \N 1 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 1 2 2 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) asof using 1 1 2 2 UInt8 UInt8 UInt8 UInt8 1 1 2 2 UInt8 UInt8 Nullable(UInt8) UInt8 +1 1 2 2 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 2 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) asof on 1 1 2 2 UInt8 UInt8 UInt8 UInt8 1 1 2 2 UInt8 UInt8 Nullable(UInt8) UInt8 +1 1 1 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 2 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) diff --git a/tests/queries/0_stateless/01428_nullable_asof_join.sql b/tests/queries/0_stateless/01428_nullable_asof_join.sql index e1b00158d68..f07a26edd97 100644 --- a/tests/queries/0_stateless/01428_nullable_asof_join.sql +++ b/tests/queries/0_stateless/01428_nullable_asof_join.sql @@ -18,13 +18,19 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0; + +SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) +FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a +ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b +USING(pk, dt) +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; select 'left asof on'; @@ -44,13 +50,13 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.pk = b.pk AND a.dt >= b.dt -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.dt >= b.dt AND a.pk = b.pk -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; select 'asof using'; @@ -64,19 +70,31 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, 2 as dt) b USING(pk, dt) -ORDER BY a.dt; +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0; + +SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) +FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a +ASOF JOIN (SELECT 1 as pk, 2 as dt) b +USING(pk, dt) +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0; + +SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) +FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a +ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b +USING(pk, dt) +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; select 'asof on'; @@ -96,19 +114,19 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.pk = b.pk AND a.dt >= b.dt -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.pk = b.pk AND a.dt >= b.dt -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.dt >= b.dt AND a.pk = b.pk -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT * FROM (SELECT NULL AS y, 1 AS x, '2020-01-01 10:10:10' :: DateTime64 AS t) AS t1 diff --git a/tests/queries/0_stateless/01429_join_on_error_messages.sql b/tests/queries/0_stateless/01429_join_on_error_messages.sql index cf9aac0e4da..b22d5259136 100644 --- a/tests/queries/0_stateless/01429_join_on_error_messages.sql +++ b/tests/queries/0_stateless/01429_join_on_error_messages.sql @@ -1,8 +1,8 @@ SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON (arrayJoin([1]) = B.b); -- { serverError 403 } SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON (A.a = arrayJoin([1])); -- { serverError 403 } -SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON equals(a); -- { serverError 62 } -SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON less(a); -- { serverError 62 } +SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON equals(a); -- { serverError 42, 62 } +SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON less(a); -- { serverError 42, 62 } SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a > b; -- { serverError 403 } SELECT 1 FROM (select 1 a) A JOIN (select 1 b) B ON a = b AND a < b; -- { serverError 403 } @@ -13,7 +13,7 @@ SET join_algorithm = 'partial_merge'; SELECT 1 FROM (select 1 a) A JOIN (select 1 b, 1 c) B ON a = b OR a = c; -- { serverError 48 } -- works for a = b OR a = b because of equivalent disjunct optimization -SET join_algorithm = 'auto'; +SET join_algorithm = 'grace_hash'; SELECT 1 FROM (select 1 a) A JOIN (select 1 b, 1 c) B ON a = b OR a = c; -- { serverError 48 } -- works for a = b OR a = b because of equivalent disjunct optimization diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index b3f9fbb42dd..c080dded1c8 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long, no-parallel, no-debug set -e diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference index d48b3738bc2..0729dd7b881 100644 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference @@ -1,10 +1,10 @@ ===http=== -{"query":"select 1 from remote('127.0.0.2', system, one) format Null\n","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} +{"query":"select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null\n","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} {"query":"DESC TABLE system.one","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} -{"query":"SELECT 1 FROM `system`.`one`","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} +{"query":"SELECT 1 AS `1` FROM `system`.`one`","status":"QueryFinish","tracestate":"some custom state","sorted_by_start_time":1} {"query":"DESC TABLE system.one","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} -{"query":"SELECT 1 FROM `system`.`one`","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} -{"query":"select 1 from remote('127.0.0.2', system, one) format Null\n","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} +{"query":"SELECT 1 AS `1` FROM `system`.`one`","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} +{"query":"select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null\n","query_status":"QueryFinish","tracestate":"some custom state","sorted_by_finish_time":1} {"total spans":"3","unique spans":"3","unique non-zero parent spans":"3"} {"initial query spans with proper parent":"1"} {"unique non-empty tracestate values":"1"} diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index b2b5ae89105..0dfec6097db 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -12,6 +12,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function check_log { ${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq " +set allow_experimental_analyzer = 1; system flush logs; -- Show queries sorted by start time. @@ -55,7 +56,7 @@ select count(*) "'"'"initial query spans with proper parent"'"'" where trace_id = UUIDNumToString(toFixedString(unhex('$trace_id'), 16)) and operation_name = 'query' - and parent_span_id in ( + and parent_span_id in ( select span_id from system.opentelemetry_span_log where trace_id = UUIDNumToString(toFixedString(unhex('$trace_id'), 16)) and parent_span_id = reinterpretAsUInt64(unhex('73')) ) ; @@ -76,7 +77,7 @@ select uniqExact(value) "'"'"unique non-empty tracestate values"'"'" # Generate some random trace id so that the prevous runs of the test do not interfere. echo "===http===" -trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4())))) settings allow_experimental_analyzer = 1") # Check that the HTTP traceparent is read, and then passed through `remote` # table function. We expect 4 queries -- one initial, one SELECT and two @@ -86,7 +87,7 @@ ${CLICKHOUSE_CURL} \ --header "traceparent: 00-$trace_id-0000000000000073-01" \ --header "tracestate: some custom state" "$CLICKHOUSE_URL" \ --get \ - --data-urlencode "query=select 1 from remote('127.0.0.2', system, one) format Null" + --data-urlencode "query=select 1 from remote('127.0.0.2', system, one) settings allow_experimental_analyzer = 1 format Null" check_log diff --git a/tests/queries/0_stateless/01457_int256_hashing.sql b/tests/queries/0_stateless/01457_int256_hashing.sql index 861e567950a..510d25f6b8c 100644 --- a/tests/queries/0_stateless/01457_int256_hashing.sql +++ b/tests/queries/0_stateless/01457_int256_hashing.sql @@ -1,13 +1,15 @@ -- Tags: no-fasttest +SET joined_subquery_requires_alias = 0; + SELECT toUInt256(123) IN (NULL); SELECT toUInt256(123) AS k GROUP BY k; -SELECT toUInt256(123) AS k FROM system.one INNER JOIN (SELECT toUInt256(123) AS k) t USING k; +SELECT k FROM (SELECT toUInt256(123) AS k FROM system.one) INNER JOIN (SELECT toUInt256(123) AS k) t USING k; SELECT arrayEnumerateUniq([toUInt256(123), toUInt256(456), toUInt256(123)]); SELECT toInt256(123) IN (NULL); SELECT toInt256(123) AS k GROUP BY k; -SELECT toInt256(123) AS k FROM system.one INNER JOIN (SELECT toInt256(123) AS k) t USING k; +SELECT k FROM (SELECT toInt256(123) AS k FROM system.one) INNER JOIN (SELECT toInt256(123) AS k) t USING k; SELECT arrayEnumerateUniq([toInt256(123), toInt256(456), toInt256(123)]); -- SELECT toUInt128(123) IN (NULL); @@ -17,17 +19,17 @@ SELECT arrayEnumerateUniq([toInt256(123), toInt256(456), toInt256(123)]); SELECT toInt128(123) IN (NULL); SELECT toInt128(123) AS k GROUP BY k; -SELECT toInt128(123) AS k FROM system.one INNER JOIN (SELECT toInt128(123) AS k) t USING k; +SELECT k FROM (SELECT toInt128(123) AS k FROM system.one) INNER JOIN (SELECT toInt128(123) AS k) t USING k; SELECT arrayEnumerateUniq([toInt128(123), toInt128(456), toInt128(123)]); SELECT toNullable(toUInt256(321)) IN (NULL); SELECT toNullable(toUInt256(321)) AS k GROUP BY k; -SELECT toNullable(toUInt256(321)) AS k FROM system.one INNER JOIN (SELECT toUInt256(321) AS k) t USING k; +SELECT k FROM (SELECT toNullable(toUInt256(321)) AS k FROM system.one) INNER JOIN (SELECT toUInt256(321) AS k) t USING k; SELECT arrayEnumerateUniq([toNullable(toUInt256(321)), toNullable(toUInt256(456)), toNullable(toUInt256(321))]); SELECT toNullable(toInt256(321)) IN (NULL); SELECT toNullable(toInt256(321)) AS k GROUP BY k; -SELECT toNullable(toInt256(321)) AS k FROM system.one INNER JOIN (SELECT toInt256(321) AS k) t USING k; +SELECT k FROM (SELECT toNullable(toInt256(321)) AS k FROM system.one) INNER JOIN (SELECT toInt256(321) AS k) t USING k; SELECT arrayEnumerateUniq([toNullable(toInt256(321)), toNullable(toInt256(456)), toNullable(toInt256(321))]); -- SELECT toNullable(toUInt128(321)) IN (NULL); @@ -37,5 +39,5 @@ SELECT arrayEnumerateUniq([toNullable(toInt256(321)), toNullable(toInt256(456)), SELECT toNullable(toInt128(321)) IN (NULL); SELECT toNullable(toInt128(321)) AS k GROUP BY k; -SELECT toNullable(toInt128(321)) AS k FROM system.one INNER JOIN (SELECT toInt128(321) AS k) t USING k; +SELECT k FROM (SELECT toNullable(toInt128(321)) AS k FROM system.one) INNER JOIN (SELECT toInt128(321) AS k) t USING k; SELECT arrayEnumerateUniq([toNullable(toInt128(321)), toNullable(toInt128(456)), toNullable(toInt128(321))]); diff --git a/tests/queries/0_stateless/01463_test_alter_live_view_refresh.reference b/tests/queries/0_stateless/01463_test_alter_live_view_refresh.reference deleted file mode 100644 index 4d98c7b6838..00000000000 --- a/tests/queries/0_stateless/01463_test_alter_live_view_refresh.reference +++ /dev/null @@ -1 +0,0 @@ -ALTER LIVE VIEW live1 REFRESH diff --git a/tests/queries/0_stateless/01463_test_alter_live_view_refresh.sql b/tests/queries/0_stateless/01463_test_alter_live_view_refresh.sql deleted file mode 100644 index 3c855c984b3..00000000000 --- a/tests/queries/0_stateless/01463_test_alter_live_view_refresh.sql +++ /dev/null @@ -1,15 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-fasttest - -CREATE TABLE test0 ( - c0 UInt64 - ) ENGINE = MergeTree() PARTITION BY c0 ORDER BY c0; - -SET allow_experimental_live_view=1; - -CREATE LIVE VIEW live1 AS SELECT * FROM test0; - -select 'ALTER LIVE VIEW live1 REFRESH'; -ALTER LIVE VIEW live1 REFRESH; -- success - -DROP TABLE test0; -DROP VIEW live1; diff --git a/tests/queries/0_stateless/01470_columns_transformers.sql b/tests/queries/0_stateless/01470_columns_transformers.sql index 2da2f6e9c67..8840ce3f3b5 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.sql +++ b/tests/queries/0_stateless/01470_columns_transformers.sql @@ -17,15 +17,15 @@ SELECT a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) from columns_transformers a; SELECT * EXCEPT STRICT i from columns_transformers; SELECT * EXCEPT STRICT (i, j) from columns_transformers; SELECT * EXCEPT STRICT i, j1 from columns_transformers; -- { serverError 47 } -SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError 16 } +SELECT * EXCEPT STRICT(i, j1) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE , BAD_ARGUMENTS } SELECT * REPLACE STRICT i + 1 AS i from columns_transformers; -SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError 16 } +SELECT * REPLACE STRICT(i + 1 AS col) from columns_transformers; -- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS } SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers; SELECT columns_transformers.* REPLACE(j + 2 AS j, i + 1 AS i) APPLY(avg) from columns_transformers; SELECT columns_transformers.* REPLACE(j + 1 AS j, j + 2 AS j) APPLY(avg) from columns_transformers; -- { serverError 43 } -- REPLACE after APPLY will not match anything SELECT a.* APPLY(toDate) REPLACE(i + 1 AS i) APPLY(any) from columns_transformers a; -SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError 16 } +SELECT a.* APPLY(toDate) REPLACE STRICT(i + 1 AS i) APPLY(any) from columns_transformers a; -- { serverError NO_SUCH_COLUMN_IN_TABLE, BAD_ARGUMENTS } EXPLAIN SYNTAX SELECT * APPLY(sum) from columns_transformers; EXPLAIN SYNTAX SELECT columns_transformers.* APPLY(avg) from columns_transformers; diff --git a/tests/queries/0_stateless/01474_bad_global_join.sql b/tests/queries/0_stateless/01474_bad_global_join.sql index b65b4e804a8..622e14e6f22 100644 --- a/tests/queries/0_stateless/01474_bad_global_join.sql +++ b/tests/queries/0_stateless/01474_bad_global_join.sql @@ -10,7 +10,7 @@ INSERT INTO local_table SELECT number AS id, toString(number) AS val FROM number CREATE TABLE dist_table AS local_table ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), local_table); -SELECT uniq(d.val) FROM dist_table AS d GLOBAL LEFT JOIN numbers(100) AS t USING id; -- { serverError 284 } +SELECT uniq(d.val) FROM dist_table AS d GLOBAL LEFT JOIN numbers(100) AS t USING id; -- { serverError 47, 284 } SELECT uniq(d.val) FROM dist_table AS d GLOBAL LEFT JOIN local_table AS t USING id; DROP TABLE local_table; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.reference b/tests/queries/0_stateless/01475_read_subcolumns_storages.reference index 4e37b751d5a..ce0b13eabd2 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.reference +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.reference @@ -7,9 +7,6 @@ TinyLog Memory 100 [1,2,3] [[[1,2],[],[4]],[[5,6],[7,8]],[[]]] [1,NULL,2] ('foo',200) {'foo':1,'bar':42} 100 0 [1,2,3] 3 [[[1,2],[],[4]],[[5,6],[7,8]],[[]]] 3 [3,2,1] [[2,0,1],[2,2],[0]] [1,NULL,2] 3 [0,1,0] ('foo',200) foo 200 {'foo':1,'bar':42} ['foo','bar'] [1,42] -MergeTree ORDER BY tuple() SETTINGS min_bytes_for_compact_part='10M' -100 [1,2,3] [[[1,2],[],[4]],[[5,6],[7,8]],[[]]] [1,NULL,2] ('foo',200) {'foo':1,'bar':42} -100 0 [1,2,3] 3 [[[1,2],[],[4]],[[5,6],[7,8]],[[]]] 3 [3,2,1] [[2,0,1],[2,2],[0]] [1,NULL,2] 3 [0,1,0] ('foo',200) foo 200 {'foo':1,'bar':42} ['foo','bar'] [1,42] MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part='10M' 100 [1,2,3] [[[1,2],[],[4]],[[5,6],[7,8]],[[]]] [1,NULL,2] ('foo',200) {'foo':1,'bar':42} 100 0 [1,2,3] 3 [[[1,2],[],[4]],[[5,6],[7,8]],[[]]] 3 [3,2,1] [[2,0,1],[2,2],[0]] [1,NULL,2] 3 [0,1,0] ('foo',200) foo 200 {'foo':1,'bar':42} ['foo','bar'] [1,42] diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index d770d5118ac..4a9b9639f78 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -12,7 +12,6 @@ create_query="CREATE TABLE subcolumns(n Nullable(UInt32), a1 Array(UInt32),\ # "StripeLog" declare -a ENGINES=("Log" "TinyLog" "Memory" \ - "MergeTree ORDER BY tuple() SETTINGS min_bytes_for_compact_part='10M'" \ "MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part='10M'" \ "MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part=0") diff --git a/tests/queries/0_stateless/01476_right_full_join_switch.reference b/tests/queries/0_stateless/01476_right_full_join_switch.reference index 1f839b86013..54f9909762f 100644 --- a/tests/queries/0_stateless/01476_right_full_join_switch.reference +++ b/tests/queries/0_stateless/01476_right_full_join_switch.reference @@ -3,6 +3,16 @@ 1 l \N LowCardinality(String) Nullable(String) 2 \N LowCardinality(String) Nullable(String) - +\N \N Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +\N \N Nullable(String) LowCardinality(String) +- +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +- 0 \N Nullable(String) LowCardinality(String) 1 \N l Nullable(String) LowCardinality(String) 0 \N Nullable(String) LowCardinality(String) diff --git a/tests/queries/0_stateless/01476_right_full_join_switch.sql b/tests/queries/0_stateless/01476_right_full_join_switch.sql index 5d041843ee2..dfbdec47e1f 100644 --- a/tests/queries/0_stateless/01476_right_full_join_switch.sql +++ b/tests/queries/0_stateless/01476_right_full_join_switch.sql @@ -10,8 +10,27 @@ CREATE TABLE nr (`x` Nullable(UInt32), `s` Nullable(String)) ENGINE = Memory; INSERT INTO t VALUES (1, 'l'); INSERT INTO nr VALUES (2, NULL); + SET join_use_nulls = 0; +SET allow_experimental_analyzer = 1; + +-- t.x is supertupe for `x` from left and right since `x` is inside `USING`. +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY t.x; + +SELECT '-'; + +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (x) ORDER BY t.x; + +SELECT '-'; + +SET allow_experimental_analyzer = 0; + +-- t.x is supertupe for `x` from left and right since `x` is inside `USING`. SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY t.x; diff --git a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference index ac4d0a3d21a..9b6890c01ee 100644 --- a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference +++ b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference @@ -17,7 +17,7 @@ 1 \N l Nullable(String) LowCardinality(String) 0 \N Nullable(String) LowCardinality(String) 1 \N l Nullable(String) LowCardinality(String) -- +- join_use_nulls - 1 l \N LowCardinality(String) Nullable(String) 2 \N \N LowCardinality(Nullable(String)) Nullable(String) 1 l \N LowCardinality(Nullable(String)) Nullable(String) @@ -33,3 +33,47 @@ 1 l \N LowCardinality(Nullable(String)) Nullable(String) \N \N \N LowCardinality(Nullable(String)) Nullable(String) - +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(Nullable(String)) +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +- analyzer - +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +- +\N \N Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +\N \N Nullable(String) LowCardinality(String) +- +1 l \N Nullable(String) Nullable(String) +0 \N \N Nullable(String) Nullable(String) +0 \N \N Nullable(String) Nullable(String) +1 l \N Nullable(String) Nullable(String) +- +0 \N \N Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +0 \N \N Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +- join_use_nulls - +1 l \N LowCardinality(String) Nullable(String) +2 \N \N LowCardinality(Nullable(String)) Nullable(String) +1 l \N LowCardinality(Nullable(String)) Nullable(String) +2 \N \N LowCardinality(Nullable(String)) Nullable(String) +- +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(Nullable(String)) +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +- +1 l \N Nullable(String) Nullable(String) +\N \N \N Nullable(String) Nullable(String) +1 l \N Nullable(String) Nullable(String) +\N \N \N Nullable(String) Nullable(String) +- +\N \N \N Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +\N \N \N Nullable(String) Nullable(String) diff --git a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2 similarity index 83% rename from tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql rename to tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2 index 2507613f051..6eafd41b411 100644 --- a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql +++ b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2 @@ -10,6 +10,14 @@ CREATE TABLE nr (`x` Nullable(UInt32), `s` Nullable(String)) ENGINE = Memory; INSERT INTO t VALUES (1, 'l'); INSERT INTO nr VALUES (2, NULL); +{% for allow_experimental_analyzer in [0, 1] -%} + +SET allow_experimental_analyzer = {{ allow_experimental_analyzer }}; + +{% if allow_experimental_analyzer -%} +SELECT '- analyzer -'; +{% endif -%} + SET join_use_nulls = 0; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; @@ -36,7 +44,7 @@ SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t SET join_use_nulls = 1; -SELECT '-'; +SELECT '- join_use_nulls -'; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x; @@ -56,10 +64,11 @@ SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr SELECT '-'; --- TODO --- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (s) ORDER BY t.x; --- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (s) ORDER BY t.x; --- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (s) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (s) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (s) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (s) ORDER BY t.x; + +{% endfor %} DROP TABLE t; DROP TABLE nr; diff --git a/tests/queries/0_stateless/01495_subqueries_in_with_statement_2.sql b/tests/queries/0_stateless/01495_subqueries_in_with_statement_2.sql index 7ec4eeaa778..43dff687d84 100644 --- a/tests/queries/0_stateless/01495_subqueries_in_with_statement_2.sql +++ b/tests/queries/0_stateless/01495_subqueries_in_with_statement_2.sql @@ -1,29 +1,29 @@ -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)) SELECT * FROM x WHERE a in (SELECT a FROM y) ORDER BY a; -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)) SELECT * FROM x left JOIN y USING a ORDER BY a; -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)) SELECT * FROM x JOIN y USING a ORDER BY x.a; -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)), z AS (SELECT toUInt64(1) b) SELECT * FROM x JOIN y USING a WHERE a in (SELECT * FROM z); -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)), z AS (SELECT * FROM x WHERE a % 2), @@ -31,19 +31,17 @@ w AS (SELECT * FROM y WHERE a > 0) SELECT * FROM x JOIN y USING a WHERE a in (SELECT * FROM z) ORDER BY x.a; -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)), z AS (SELECT * FROM x WHERE a % 2), w AS (SELECT * FROM y WHERE a > 0) -SELECT max(a) FROM x JOIN y USING a WHERE a in (SELECT * FROM z) -HAVING a > (SELECT min(a) FROM w); +SELECT max(a) FROM x JOIN y USING a WHERE a in (SELECT * FROM z) AND a > (SELECT min(a) FROM w); -WITH +WITH x AS (SELECT number AS a FROM numbers(10)), y AS (SELECT number AS a FROM numbers(5)), z AS (SELECT * FROM x WHERE a % 2), w AS (SELECT * FROM y WHERE a > 0) -SELECT a FROM x JOIN y USING a WHERE a in (SELECT * FROM z) -HAVING a <= (SELECT max(a) FROM w) +SELECT a FROM x JOIN y USING a WHERE a in (SELECT * FROM z) AND a <= (SELECT max(a) FROM w) ORDER BY x.a; diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference index 5abc312652d..b434c50b070 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference @@ -1,3 +1,4 @@ +3 0 0 2 diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql index e4e2e3dd76a..030db421683 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql @@ -7,16 +7,16 @@ insert into test1 values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), set max_rows_to_read = 1; -- non-optimized -select count() from test1 settings max_parallel_replicas = 3; -- { serverError 158; } +select count() from test1 settings max_parallel_replicas = 3; -- optimized (toYear is monotonic and we provide the partition expr as is) select count() from test1 where toYear(toDate(p)) = 1999; -- non-optimized (toDate(DateTime) is always monotonic, but we cannot relaxing the predicates to do trivial count()) -select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); -- { serverError 158; } +select count() from test1 where p > toDateTime('2020-09-01 10:00:00'); -- { serverError 158 } -- optimized (partition expr wrapped with non-monotonic functions) select count() FROM test1 where toDate(p) = '2020-09-01' and sipHash64(toString(toDate(p))) % 2 = 1; select count() FROM test1 where toDate(p) = '2020-09-01' and sipHash64(toString(toDate(p))) % 2 = 0; -- non-optimized (some predicate depends on non-partition_expr columns) -select count() FROM test1 where toDate(p) = '2020-09-01' and k = 2; -- { serverError 158; } +select count() FROM test1 where toDate(p) = '2020-09-01' and k = 2; -- { serverError 158 } -- optimized select count() from test1 where toDate(p) > '2020-09-01'; -- non-optimized @@ -35,10 +35,10 @@ select count() from test_tuple where i > 2; -- optimized select count() from test_tuple where i < 1; -- non-optimized -select count() from test_tuple array join [p,p] as c where toDate(p) = '2020-09-01'; -- { serverError 158; } +select count() from test_tuple array join [p,p] as c where toDate(p) = '2020-09-01'; -- { serverError 158 } select count() from test_tuple array join [1,2] as c where toDate(p) = '2020-09-01' settings max_rows_to_read = 4; -- non-optimized -select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01'; -- { serverError 158; } +select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01'; -- { serverError 158 } select count() from test_tuple array join [1,2,3] as c where toDate(p) = '2020-09-01' settings max_rows_to_read = 6; create table test_two_args(i int, j int, k int) engine MergeTree partition by i + j order by k settings index_granularity = 1; @@ -48,7 +48,7 @@ insert into test_two_args values (1, 2, 3), (2, 1, 3), (0, 3, 4); -- optimized select count() from test_two_args where i + j = 3; -- non-optimized -select count() from test_two_args where i = 1; -- { serverError 158; } +select count() from test_two_args where i = 1; -- { serverError 158 } drop table test1; drop table test_tuple; diff --git a/tests/queries/0_stateless/01508_explain_header.reference b/tests/queries/0_stateless/01508_explain_header.reference index 7510e67c643..206bd4afdab 100644 --- a/tests/queries/0_stateless/01508_explain_header.reference +++ b/tests/queries/0_stateless/01508_explain_header.reference @@ -1,4 +1,4 @@ -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + (Projection + Change column names to column identifiers))) Header: x UInt8 ReadFromStorage (SystemOne) Header: dummy UInt8 diff --git a/tests/queries/0_stateless/01508_explain_header.sql b/tests/queries/0_stateless/01508_explain_header.sql index fb274c84d8a..a9f876068aa 100644 --- a/tests/queries/0_stateless/01508_explain_header.sql +++ b/tests/queries/0_stateless/01508_explain_header.sql @@ -1 +1,3 @@ +SET allow_experimental_analyzer = 1; + explain header = 1 select 1 as x; diff --git a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh index efe24aa3a88..80318ba67fb 100755 --- a/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh +++ b/tests/queries/0_stateless/01508_race_condition_rename_clear_zookeeper_long.sh @@ -8,9 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames0" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS table_for_renames50" - -$CLICKHOUSE_CLIENT --query "CREATE TABLE table_for_renames0 (value UInt64, data String) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_rename', '1') ORDER BY tuple() SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, min_rows_for_compact_part = 100000, min_rows_for_compact_part = 10000000, write_ahead_log_max_bytes = 1" - +$CLICKHOUSE_CLIENT --query "CREATE TABLE table_for_renames0 (value UInt64, data String) ENGINE ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_rename', '1') ORDER BY tuple() SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 0" $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_renames0 SELECT number, toString(number) FROM numbers(1000)" diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql index 228e4d73167..3d57518d0f4 100644 --- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql +++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql @@ -13,9 +13,9 @@ set max_memory_usage='500M'; set max_threads=1; set max_block_size=500; -select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241; } +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=0; -- { serverError 241 } select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1; -- for WITH TOTALS previous groups should be kept. -select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241; } +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241 } drop table data_01513; diff --git a/tests/queries/0_stateless/01516_create_table_primary_key.sql b/tests/queries/0_stateless/01516_create_table_primary_key.sql index b2b9f288eab..630c573c2cc 100644 --- a/tests/queries/0_stateless/01516_create_table_primary_key.sql +++ b/tests/queries/0_stateless/01516_create_table_primary_key.sql @@ -35,7 +35,7 @@ ATTACH TABLE primary_key_test(v1 Int32, v2 Int32) ENGINE=ReplacingMergeTree ORDE SELECT * FROM primary_key_test FINAL; DROP TABLE primary_key_test; -CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36; } +CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY v1; -- { serverError 36 } CREATE TABLE primary_key_test(v1 Int64, v2 Int32, v3 String, PRIMARY KEY(v1, gcd(v1, v2))) ENGINE=ReplacingMergeTree ORDER BY (v1, gcd(v1, v2)); diff --git a/tests/queries/0_stateless/01527_bad_aggregation_in_lambda.sql b/tests/queries/0_stateless/01527_bad_aggregation_in_lambda.sql index 10ffca22bd1..3be73ba56e7 100644 --- a/tests/queries/0_stateless/01527_bad_aggregation_in_lambda.sql +++ b/tests/queries/0_stateless/01527_bad_aggregation_in_lambda.sql @@ -1 +1 @@ -SELECT arrayMap(x -> x * sum(x), range(10)); -- { serverError 47 } +SELECT arrayMap(x -> x * sum(x), range(10)); -- { serverError 10, 47 } diff --git a/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql b/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql index 08fba7480d1..ac04178e585 100644 --- a/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql +++ b/tests/queries/0_stateless/01528_allow_nondeterministic_optimize_skip_unused_shards.sql @@ -5,7 +5,7 @@ create table dist_01528 as system.one engine=Distributed('test_cluster_two_shard set optimize_skip_unused_shards=1; set force_optimize_skip_unused_shards=1; -select * from dist_01528 where dummy = 2; -- { serverError 507; } +select * from dist_01528 where dummy = 2; -- { serverError 507 } select * from dist_01528 where dummy = 2 settings allow_nondeterministic_optimize_skip_unused_shards=1; drop table dist_01528; diff --git a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql index 7a2e64742cf..13b4a4e331b 100644 --- a/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql +++ b/tests/queries/0_stateless/01530_drop_database_atomic_sync.sql @@ -30,7 +30,7 @@ create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickho drop database db_01530_atomic; create database db_01530_atomic Engine=Atomic; -create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/db_01530_atomic/data', 'test') order by key; -- { serverError 253; } +create table db_01530_atomic.data (key Int) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/db_01530_atomic/data', 'test') order by key; -- { serverError 253 } set database_atomic_wait_for_drop_and_detach_synchronously=1; diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index f217b6094b2..4b907d5ebb6 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -4,8 +4,8 @@ SET insert_keeper_fault_injection_probability=0; -- disable fault injection; part ids are non-deterministic in case of insert retries -DROP TABLE IF EXISTS execute_on_single_replica_r1 NO DELAY; -DROP TABLE IF EXISTS execute_on_single_replica_r2 NO DELAY; +DROP TABLE IF EXISTS execute_on_single_replica_r1 SYNC; +DROP TABLE IF EXISTS execute_on_single_replica_r2 SYNC; /* that test requires fixed zookeeper path, so we cannot use ReplicatedMergeTree({database}) */ CREATE TABLE execute_on_single_replica_r1 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01532/execute_on_single_replica', 'r1') ORDER BY tuple() SETTINGS execute_merges_on_single_replica_time_threshold=10; @@ -130,5 +130,5 @@ GROUP BY part_name ORDER BY part_name FORMAT Vertical; -DROP TABLE execute_on_single_replica_r1 NO DELAY; -DROP TABLE execute_on_single_replica_r2 NO DELAY; +DROP TABLE execute_on_single_replica_r1 SYNC; +DROP TABLE execute_on_single_replica_r2 SYNC; diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index f39b56bbaa2..1a6f0ec395e 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -37,7 +37,7 @@ SELECT col1.a FROM nested FORMAT Null; -- 4 files: (col1.size0, col1.a) x2 SYSTEM FLUSH LOGS; -SELECT ProfileEvents['FileOpen'] +SELECT ProfileEvents['FileOpen'] - ProfileEvents['CreatedReadBufferDirectIOFailed'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT col1.a FROM %nested%')) AND event_date >= yesterday() AND current_database = currentDatabase(); @@ -47,7 +47,7 @@ SELECT col3.n2.s FROM nested FORMAT Null; -- 6 files: (col3.size0, col3.n2.size1, col3.n2.s) x2 SYSTEM FLUSH LOGS; -SELECT ProfileEvents['FileOpen'] +SELECT ProfileEvents['FileOpen'] - ProfileEvents['CreatedReadBufferDirectIOFailed'] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT col3.n2.s FROM %nested%')) AND event_date >= yesterday() AND current_database = currentDatabase(); diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh index 8c4900043d0..308b90dcd55 100755 --- a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh +++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv -$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')" +$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=50Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String') settings max_threads=1" diff --git a/tests/queries/0_stateless/01548_with_totals_having.sql b/tests/queries/0_stateless/01548_with_totals_having.sql index 669d989aa5f..2562ea3f3e5 100644 --- a/tests/queries/0_stateless/01548_with_totals_having.sql +++ b/tests/queries/0_stateless/01548_with_totals_having.sql @@ -1,2 +1,2 @@ -SELECT * FROM numbers(4) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([]); -- { serverError 44 } +SELECT * FROM numbers(4) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([]); -- { serverError 44, 59 } SELECT * FROM numbers(4) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([3, 2, 1, 0]) ORDER BY number; -- { serverError 44 } diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index e374e012238..c202ad349d6 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-s3-storage, no-random-merge-tree-settings DROP TABLE IF EXISTS data_01551; diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference index 745160a517e..26aea1555a5 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference @@ -1,3 +1,5 @@ +masked flush only +3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000","AUTHENTICATION_FAILED",1 masked 3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000","AUTHENTICATION_FAILED",1 no masking diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql index fea75e1439f..61083c3ae14 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -9,6 +9,20 @@ drop table if exists dist_01555; drop table if exists data_01555; create table data_01555 (key Int) Engine=Null(); +-- +-- masked flush only +-- +SELECT 'masked flush only'; +create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect_pw, currentDatabase(), data_01555, key); +system stop distributed sends dist_01555; + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; -- { serverError 516 } +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +drop table dist_01555; + -- -- masked -- @@ -17,7 +31,7 @@ create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect insert into dist_01555 values (1)(2); -- since test_cluster_with_incorrect_pw contains incorrect password ignore error -system flush distributed dist_01555; -- { serverError 516; } +system flush distributed dist_01555; -- { serverError 516 } select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1'), extract(last_exception, 'AUTHENTICATION_FAILED'), dateDiff('s', last_exception_time, now()) < 5 from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; drop table dist_01555; @@ -29,7 +43,6 @@ SELECT 'no masking'; create table dist_01555 (key Int) Engine=Distributed(test_shard_localhost, currentDatabase(), data_01555, key); insert into dist_01555 values (1)(2); --- since test_cluster_with_incorrect_pw contains incorrect password ignore error system flush distributed dist_01555; select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; diff --git a/tests/queries/0_stateless/01556_explain_select_with_union_query.reference b/tests/queries/0_stateless/01556_explain_select_with_union_query.reference index c18e6b70b0d..27cf4c129b1 100644 --- a/tests/queries/0_stateless/01556_explain_select_with_union_query.reference +++ b/tests/queries/0_stateless/01556_explain_select_with_union_query.reference @@ -1,180 +1,180 @@ Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Distinct Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) Union - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) - Expression ((Projection + Before ORDER BY)) + Expression ((Project names + (Projection + Change column names to column identifiers))) ReadFromStorage (SystemOne) diff --git a/tests/queries/0_stateless/01556_explain_select_with_union_query.sql b/tests/queries/0_stateless/01556_explain_select_with_union_query.sql index dcd9bbe7347..bbd96ef5c69 100644 --- a/tests/queries/0_stateless/01556_explain_select_with_union_query.sql +++ b/tests/queries/0_stateless/01556_explain_select_with_union_query.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET union_default_mode = 'DISTINCT'; set enable_global_with_statement = 1; diff --git a/tests/queries/0_stateless/01558_ttest_scipy.python b/tests/queries/0_stateless/01558_ttest_scipy.python index 4d913d4292f..75e1c2701b2 100644 --- a/tests/queries/0_stateless/01558_ttest_scipy.python +++ b/tests/queries/0_stateless/01558_ttest_scipy.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from scipy import stats @@ -6,70 +6,86 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient + def test_and_check(name, a, b, t_stat, p_value, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ttest;") - client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) - client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) + client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ttest VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ttest VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value ".format(name) + - "FROM ttest FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM ttest FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS ttest;") def test_student(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) + def test_welch(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=15, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=7, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=3, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1, size=512), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=5, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=10, size=1024), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) + if __name__ == "__main__": test_student() test_welch() - print("Ok.") \ No newline at end of file + print("Ok.") diff --git a/tests/queries/0_stateless/01561_clickhouse_client_stage.reference b/tests/queries/0_stateless/01561_clickhouse_client_stage.reference index 44c39f2a444..00e0f4ddb2e 100644 --- a/tests/queries/0_stateless/01561_clickhouse_client_stage.reference +++ b/tests/queries/0_stateless/01561_clickhouse_client_stage.reference @@ -1,15 +1,15 @@ -execute: default +execute: --allow_experimental_analyzer=1 "foo" 1 -execute: --stage fetch_columns -"dummy" +execute: --allow_experimental_analyzer=1 --stage fetch_columns +"system.one.dummy_0" 0 -execute: --stage with_mergeable_state -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state +"1_UInt8" 1 -execute: --stage with_mergeable_state_after_aggregation -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state_after_aggregation +"1_UInt8" 1 -execute: --stage complete +execute: --allow_experimental_analyzer=1 --stage complete "foo" 1 diff --git a/tests/queries/0_stateless/01561_clickhouse_client_stage.sh b/tests/queries/0_stateless/01561_clickhouse_client_stage.sh index 72161333812..99267458421 100755 --- a/tests/queries/0_stateless/01561_clickhouse_client_stage.sh +++ b/tests/queries/0_stateless/01561_clickhouse_client_stage.sh @@ -5,6 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function execute_query() { if [ $# -eq 0 ]; then @@ -15,8 +19,8 @@ function execute_query() ${CLICKHOUSE_CLIENT} "$@" --format CSVWithNames -q "SELECT 1 AS foo" } -execute_query # default -- complete -execute_query --stage fetch_columns -execute_query --stage with_mergeable_state -execute_query --stage with_mergeable_state_after_aggregation -execute_query --stage complete +execute_query "${opts[@]}" # default -- complete +execute_query "${opts[@]}" --stage fetch_columns +execute_query "${opts[@]}" --stage with_mergeable_state +execute_query "${opts[@]}" --stage with_mergeable_state_after_aggregation +execute_query "${opts[@]}" --stage complete diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index 7958e8bbaf1..4713120287d 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -6,7 +6,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -14,40 +14,51 @@ from pure_http_client import ClickHouseClient def test_and_check(name, a, b, t_stat, p_value): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS mann_whitney;") - client.query("CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO mann_whitney VALUES {};".format(", ".join(['({},{}), ({},{})'.format(i, 0, j, 1) for i,j in zip(a, b)]))) + client.query( + "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;" + ) + client.query( + "INSERT INTO mann_whitney VALUES {};".format( + ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value ".format(name) + - "FROM mann_whitney FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM mann_whitney FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert abs( + real_t_stat - np.float64(t_stat) < 1e-2 + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < 1e-2 + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS mann_whitney;") def test_mann_whitney(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5) - rvs2 = np.round(stats.expon.rvs(scale=0.2,size=500), 5) - s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='two-sided') + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 5) + rvs2 = np.round(stats.expon.rvs(scale=0.2, size=500), 5) + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided") test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p) test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p) equal = np.round(stats.cauchy.rvs(scale=5, size=500), 5) - s, p = stats.mannwhitneyu(equal, equal, alternative='two-sided') + s, p = stats.mannwhitneyu(equal, equal, alternative="two-sided") test_and_check("mannWhitneyUTest('two-sided')", equal, equal, s, p) - s, p = stats.mannwhitneyu(equal, equal, alternative='less', use_continuity=False) + s, p = stats.mannwhitneyu(equal, equal, alternative="less", use_continuity=False) test_and_check("mannWhitneyUTest('less', 0)", equal, equal, s, p) - - rvs1 = np.round(stats.cauchy.rvs(scale=10,size=65536), 5) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) - s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='greater') + rvs1 = np.round(stats.cauchy.rvs(scale=10, size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=65536), 5) + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="greater") test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p) + if __name__ == "__main__": test_mann_whitney() - print("Ok.") \ No newline at end of file + print("Ok.") diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference index 8c8bb73b801..49b4d22ea71 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference @@ -4,10 +4,10 @@ SELECT FROM test_order_by ORDER BY timestamp ASC LIMIT 10 -Expression (Projection) +Expression (Project names) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) ReadFromMergeTree (default.test_order_by) SELECT timestamp, @@ -15,10 +15,10 @@ SELECT FROM test_order_by ORDER BY toDate(timestamp) ASC LIMIT 10 -Expression (Projection) +Expression (Project names) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) ReadFromMergeTree (default.test_order_by) SELECT timestamp, @@ -28,10 +28,10 @@ ORDER BY toDate(timestamp) ASC, timestamp ASC LIMIT 10 -Expression (Projection) +Expression (Project names) Limit (preliminary LIMIT (without OFFSET)) Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) ReadFromMergeTree (default.test_order_by) SELECT timestamp, diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql index 15ddb5a848f..2fe24d1662d 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET optimize_monotonous_functions_in_order_by = 1; SET optimize_read_in_order = 1; diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index 1f28225bef8..d59889c1af8 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -122,7 +122,7 @@ create table pl (dt DateTime, i int, projection p (select sum(i) group by toStar insert into pl values ('2020-10-24', 1); set max_rows_to_read = 2; -select sum(i) from pd group by dt_m settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +select sum(i) from pd group by dt_m settings optimize_use_projections = 1, force_optimize_projection = 1; drop table pd; drop table pl; diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index aaa88d66ca0..b981a46b4fd 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -915,12 +915,12 @@ from (select number, intDiv(number, 3) p, mod(number, 5) o from numbers(16)) t ; -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + Projection)) Window (Window step for window \'\') - Window (Window step for window \'PARTITION BY p\') - Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') - Sorting (Sorting for window \'PARTITION BY p ORDER BY o ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) + Window (Window step for window \'PARTITION BY t.p_0\') + Window (Window step for window \'PARTITION BY t.p_0 ORDER BY t.o_1 ASC\') + Sorting (Sorting for window \'PARTITION BY t.p_0 ORDER BY t.o_1 ASC\') + Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromStorage (SystemNumbers) explain select count(*) over (order by o, number), @@ -929,13 +929,13 @@ from (select number, intDiv(number, 3) p, mod(number, 5) o from numbers(16)) t ; -Expression ((Projection + Before ORDER BY)) - Window (Window step for window \'ORDER BY o ASC, number ASC\') - Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') - Window (Window step for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY)) [lifted up part]) - Sorting (Sorting for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) +Expression ((Project names + Projection)) + Window (Window step for window \'ORDER BY t.o_0 ASC, t.number_1 ASC\') + Sorting (Sorting for window \'ORDER BY t.o_0 ASC, t.number_1 ASC\') + Window (Window step for window \'ORDER BY t.number_1 ASC\') + Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))) [lifted up part]) + Sorting (Sorting for window \'ORDER BY t.number_1 ASC\') + Expression ((Before WINDOW + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) ReadFromStorage (SystemNumbers) -- A test case for the sort comparator found by fuzzer. SELECT diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 3f4a028eac2..3c9c1f9cea7 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -1,5 +1,7 @@ -- Tags: long +SET allow_experimental_analyzer = 1; + -- { echo } -- just something basic diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index 022d8071ffa..4911b7aa792 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -4,10 +4,6 @@ drop table if exists stack; set max_insert_threads = 4; --- Temporary disable aggregation in order, --- because it may fail with UBSan. -set optimize_aggregation_in_order = 0; - create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) Engine = MergeTree partition by toYYYYMM(dt) diff --git a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.reference b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.reference index e69de29bb2d..f7d86529347 100644 --- a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.reference +++ b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.reference @@ -0,0 +1,4 @@ +1683676800 +1683676800 +1683676800 +1683676800 diff --git a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql index 5dc87e31f75..f2ba18a3b55 100644 --- a/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql +++ b/tests/queries/0_stateless/01592_toUnixTimestamp_Date.sql @@ -1 +1,4 @@ -select toUnixTimestamp(today()); -- { serverError 44; } +select toUnixTimestamp(makeDate(2023, 5, 10)); +select toUnixTimestamp(makeDate32(2023, 5, 10)); +select toUnixTimestamp(makeDate(2023, 5, 10), 'Pacific/Auckland'); +select toUnixTimestamp(makeDate32(2023, 5, 10), 'Pacific/Auckland'); \ No newline at end of file diff --git a/tests/queries/0_stateless/01595_countMatches.sql b/tests/queries/0_stateless/01595_countMatches.sql index 6374fe7bc5b..0b170945d44 100644 --- a/tests/queries/0_stateless/01595_countMatches.sql +++ b/tests/queries/0_stateless/01595_countMatches.sql @@ -25,5 +25,5 @@ select countMatchesCaseInsensitive('foo.com BAR.COM baz.com bam.com', '([^. ]+)\ select countMatchesCaseInsensitive('foo.com@foo.com bar.com@foo.com BAZ.com@foo.com bam.com@foo.com', '([^. ]+)\.([^. ]+)@([^. ]+)\.([^. ]+)'); select 'errors'; -select countMatches(1, 'foo') from numbers(1); -- { serverError 43; } -select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44; } +select countMatches(1, 'foo') from numbers(1); -- { serverError 43 } +select countMatches('foobarfoo', toString(number)) from numbers(1); -- { serverError 44 } diff --git a/tests/queries/0_stateless/01596_full_join_chertus.sql b/tests/queries/0_stateless/01596_full_join_chertus.sql index 162b9f7be8f..32911abb792 100644 --- a/tests/queries/0_stateless/01596_full_join_chertus.sql +++ b/tests/queries/0_stateless/01596_full_join_chertus.sql @@ -1,9 +1,9 @@ select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 -using k order by js1.k, js2.k; +ON js1.k = js2.k order by js1.k, js2.k; select toTypeName(js1.k), toTypeName(js2.k), toTypeName(js1.s), toTypeName(js2.s) from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 -using k order by js1.k, js2.k; +ON js1.k = js2.k order by js1.k, js2.k; diff --git a/tests/queries/0_stateless/01596_setting_limit_offset.reference b/tests/queries/0_stateless/01596_setting_limit_offset.reference index 96483268d43..fe6390b172c 100644 --- a/tests/queries/0_stateless/01596_setting_limit_offset.reference +++ b/tests/queries/0_stateless/01596_setting_limit_offset.reference @@ -22,6 +22,10 @@ 107 108 109 +102 +103 +104 +105 105 106 107 @@ -38,6 +42,26 @@ 64 64 60 +60 +60 +61 +61 +62 +62 +63 +63 +64 +64 +60 +60 +61 +61 +62 +62 +63 +63 +64 +64 35 35 36 diff --git a/tests/queries/0_stateless/01596_setting_limit_offset.sql b/tests/queries/0_stateless/01596_setting_limit_offset.sql index 3c91e3542bb..0c2ab5fb4dc 100644 --- a/tests/queries/0_stateless/01596_setting_limit_offset.sql +++ b/tests/queries/0_stateless/01596_setting_limit_offset.sql @@ -11,6 +11,7 @@ SELECT * FROM test OFFSET 20; -- 5 rows SELECT * FROM (SELECT i FROM test LIMIT 10 OFFSET 50) TMP; -- 5 rows SELECT * FROM test LIMIT 4 OFFSET 192; -- 4 rows SELECT * FROM test LIMIT 10 OFFSET 195; -- 5 rows +SELECT * FROM test LIMIT 2*2 OFFSET 192; -- Only set offset SET limit = 0; @@ -21,6 +22,8 @@ SELECT * FROM test LIMIT 100; -- no result SET offset = 10; SELECT * FROM test LIMIT 20 OFFSET 100; -- 10 rows SELECT * FROM test LIMIT 11 OFFSET 100; -- 1 rows +SELECT * FROM test LIMIT 20 OFFSET 10*10; +SELECT * FROM test LIMIT 4*5 OFFSET 10*10; -- offset and limit together SET limit = 10; diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index f47d0863e69..89ce84f6dbc 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,7 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) - = (SELECT sum(active), sum(NOT active) FROM system.parts)" + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition. But it should get expected result eventually. diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh index 05edf02f7ed..5f724e81042 100755 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: no-s3-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -11,7 +11,8 @@ set -o pipefail # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT (SELECT sumIf(value, metric = 'PartsInMemory'), sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) = - (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') FROM system.parts)" + (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') + FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition (especially in fasttest that runs tests in parallel). @@ -34,7 +35,7 @@ $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 -- # InMemory - [0..5] # Compact - (5..10] # Wide - >10 -$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_bytes_for_compact_part=0, min_rows_for_wide_part=10, min_rows_for_compact_part=5" +$CLICKHOUSE_CLIENT --query="CREATE TABLE data_01600 (part_type String, key Int) ENGINE = MergeTree PARTITION BY part_type ORDER BY key SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=10" # InMemory $CLICKHOUSE_CLIENT --query="INSERT INTO data_01600 SELECT 'InMemory', number FROM system.numbers LIMIT 1" diff --git a/tests/queries/0_stateless/01602_array_aggregation.reference b/tests/queries/0_stateless/01602_array_aggregation.reference index bc21fae692e..ec8a0838401 100644 --- a/tests/queries/0_stateless/01602_array_aggregation.reference +++ b/tests/queries/0_stateless/01602_array_aggregation.reference @@ -34,6 +34,10 @@ Table array decimal avg 3.5 0 2 +2023-04-05 00:25:24 2023-04-05 00:25:23 [0,1] +2023-04-05 00:25:24.124 2023-04-05 00:25:23.123 [0,1.001] +2023-04-06 2023-04-05 [0,1] +2023-04-06 2023-04-05 [0,1] Types of aggregation result array min Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 diff --git a/tests/queries/0_stateless/01602_array_aggregation.sql b/tests/queries/0_stateless/01602_array_aggregation.sql index 7159eb1442b..7c0f6eb8267 100644 --- a/tests/queries/0_stateless/01602_array_aggregation.sql +++ b/tests/queries/0_stateless/01602_array_aggregation.sql @@ -34,6 +34,11 @@ SELECT arrayAvg(x) FROM test_aggregation; DROP TABLE test_aggregation; +WITH ['2023-04-05 00:25:23', '2023-04-05 00:25:24']::Array(DateTime) AS dt SELECT arrayMax(dt), arrayMin(dt), arrayDifference(dt); +WITH ['2023-04-05 00:25:23.123', '2023-04-05 00:25:24.124']::Array(DateTime64(3)) AS dt SELECT arrayMax(dt), arrayMin(dt), arrayDifference(dt); +WITH ['2023-04-05', '2023-04-06']::Array(Date) AS d SELECT arrayMax(d), arrayMin(d), arrayDifference(d); +WITH ['2023-04-05', '2023-04-06']::Array(Date32) AS d SELECT arrayMax(d), arrayMin(d), arrayDifference(d); + SELECT 'Types of aggregation result array min'; SELECT toTypeName(arrayMin([toInt8(0)])), toTypeName(arrayMin([toInt16(0)])), toTypeName(arrayMin([toInt32(0)])), toTypeName(arrayMin([toInt64(0)])); SELECT toTypeName(arrayMin([toUInt8(0)])), toTypeName(arrayMin([toUInt16(0)])), toTypeName(arrayMin([toUInt32(0)])), toTypeName(arrayMin([toUInt64(0)])); diff --git a/tests/queries/0_stateless/01602_show_create_view.reference b/tests/queries/0_stateless/01602_show_create_view.reference index 5d4bd2cd972..5fe11a38db3 100644 --- a/tests/queries/0_stateless/01602_show_create_view.reference +++ b/tests/queries/0_stateless/01602_show_create_view.reference @@ -1,6 +1,5 @@ CREATE VIEW test_1602.v\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl CREATE MATERIALIZED VIEW test_1602.vv\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n)\nENGINE = MergeTree\nPARTITION BY toYYYYMM(EventDate)\nORDER BY (CounterID, EventDate, intHash32(UserID))\nSETTINGS index_granularity = 8192 AS\nSELECT *\nFROM test_1602.tbl -CREATE LIVE VIEW test_1602.vvv\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl CREATE VIEW test_1602.VIEW\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl CREATE VIEW test_1602.DATABASE\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl CREATE VIEW test_1602.DICTIONARY\n(\n `EventDate` DateTime,\n `CounterID` UInt32,\n `UserID` UInt32\n) AS\nSELECT *\nFROM test_1602.tbl diff --git a/tests/queries/0_stateless/01602_show_create_view.sql b/tests/queries/0_stateless/01602_show_create_view.sql index 11b9c235290..1d4dd54b1c1 100644 --- a/tests/queries/0_stateless/01602_show_create_view.sql +++ b/tests/queries/0_stateless/01602_show_create_view.sql @@ -18,16 +18,10 @@ CREATE MATERIALIZED VIEW test_1602.vv (`EventDate` DateTime, `CounterID` UInt32, CREATE VIEW test_1602.VIEW AS SELECT * FROM test_1602.tbl; -SET allow_experimental_live_view=1; - -CREATE LIVE VIEW test_1602.vvv AS SELECT * FROM test_1602.tbl; - SHOW CREATE VIEW test_1602.v; SHOW CREATE VIEW test_1602.vv; -SHOW CREATE VIEW test_1602.vvv; - SHOW CREATE VIEW test_1602.not_exist_view; -- { serverError 390 } SHOW CREATE VIEW test_1602.tbl; -- { serverError 36 } diff --git a/tests/queries/0_stateless/01603_decimal_mult_float.reference b/tests/queries/0_stateless/01603_decimal_mult_float.reference index c2917516e99..4c9d45423ee 100644 --- a/tests/queries/0_stateless/01603_decimal_mult_float.reference +++ b/tests/queries/0_stateless/01603_decimal_mult_float.reference @@ -3,7 +3,7 @@ 0.00012000000000000002 150.16500000000002 7.775900000000001 -56.62269 +56.622689999999984 598.8376688440277 299.41883695311844 0.7485470860550345 diff --git a/tests/queries/0_stateless/01603_decimal_mult_float.sql b/tests/queries/0_stateless/01603_decimal_mult_float.sql index 99e4b775da7..799ab91d332 100644 --- a/tests/queries/0_stateless/01603_decimal_mult_float.sql +++ b/tests/queries/0_stateless/01603_decimal_mult_float.sql @@ -1,3 +1,5 @@ +SET optimize_arithmetic_operations_in_aggregate_functions = 0; + SELECT toDecimal32(2, 2) * 1.2; SELECT toDecimal64(0.5, 2) * 20.33; SELECT 0.00001 * toDecimal32(12, 2); diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.reference b/tests/queries/0_stateless/01622_constraints_simple_optimization.reference index 7e012e1a17b..a375c35ca3e 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.reference +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.reference @@ -38,8 +38,47 @@ WHERE (c > 100) OR (b > 100) SELECT count() AS `count()` FROM constraint_test_constants WHERE c > 100 +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.constraint_test_constants + WHERE + FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: c, result_type: Int64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_100, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT count() AS `count()` FROM constraint_test_constants WHERE c > 100 +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.constraint_test_constants + WHERE + FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: c, result_type: Int64, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_100, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT count() AS `count()` FROM constraint_test_constants +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.constraint_test_constants + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql index 7ec9e1a3158..a4d0035c590 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql @@ -98,8 +98,12 @@ SELECT count() FROM constraint_test_constants WHERE 11 <= a; ---> assumption -> -- A AND NOT A EXPLAIN SYNTAX SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100); +-- EXPLAIN QUERY TREE SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100); ---> the order of the generated checks is not consistent EXPLAIN SYNTAX SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100) AND (NOT b > 100 OR c > 100); +EXPLAIN QUERY TREE SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100) AND (NOT b > 100 OR c > 100) SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100) AND (NOT b > 100 OR c > 100) AND (c > 100); +EXPLAIN QUERY TREE SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100) AND (NOT b > 100 OR c > 100) AND (c > 100) SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100) AND (NOT b > 100 OR c > 100) AND (c <= 100); +EXPLAIN QUERY TREE SELECT count() FROM constraint_test_constants WHERE (a > 100 OR b > 100 OR c > 100) AND (a <= 100 OR b > 100 OR c > 100) AND (NOT b > 100 OR c > 100) AND (c <= 100) SETTINGS allow_experimental_analyzer = 1; DROP TABLE constraint_test_constants; diff --git a/tests/queries/0_stateless/01622_constraints_where_optimization.reference b/tests/queries/0_stateless/01622_constraints_where_optimization.reference index c7c516025f2..b5520d75b0e 100644 --- a/tests/queries/0_stateless/01622_constraints_where_optimization.reference +++ b/tests/queries/0_stateless/01622_constraints_where_optimization.reference @@ -1,14 +1,89 @@ SELECT count() FROM t_constraints_where WHERE 0 +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.t_constraints_where + WHERE + CONSTANT id: 4, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT count() FROM t_constraints_where WHERE 0 +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.t_constraints_where + WHERE + CONSTANT id: 4, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT count() FROM t_constraints_where WHERE 0 +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.t_constraints_where + WHERE + CONSTANT id: 4, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT count() FROM t_constraints_where WHERE b < 8 +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.t_constraints_where + WHERE + FUNCTION id: 4, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: b, result_type: UInt32, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_8, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT count() FROM t_constraints_where +PREWHERE (b > 20) OR (b < 8) +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.t_constraints_where + PREWHERE + FUNCTION id: 4, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: b, result_type: UInt32, source_id: 3 + CONSTANT id: 7, constant_value: UInt64_8, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT count() +FROM t_constraints_where +QUERY id: 0 + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE id: 3, table_name: default.t_constraints_where + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/01622_constraints_where_optimization.sql b/tests/queries/0_stateless/01622_constraints_where_optimization.sql index 6a9d1ba9f6b..2818351a120 100644 --- a/tests/queries/0_stateless/01622_constraints_where_optimization.sql +++ b/tests/queries/0_stateless/01622_constraints_where_optimization.sql @@ -8,9 +8,15 @@ CREATE TABLE t_constraints_where(a UInt32, b UInt32, CONSTRAINT c1 ASSUME b >= 5 INSERT INTO t_constraints_where VALUES (1, 7); EXPLAIN SYNTAX SELECT count() FROM t_constraints_where WHERE b > 15; -- assumption -> 0 +EXPLAIN QUERY TREE SELECT count() FROM t_constraints_where WHERE b > 15 SETTINGS allow_experimental_analyzer = 1; -- assumption -> 0 EXPLAIN SYNTAX SELECT count() FROM t_constraints_where WHERE b = 20; -- assumption -> 0 +EXPLAIN QUERY TREE SELECT count() FROM t_constraints_where WHERE b = 20 SETTINGS allow_experimental_analyzer = 1; -- assumption -> 0 EXPLAIN SYNTAX SELECT count() FROM t_constraints_where WHERE b < 2; -- assumption -> 0 +EXPLAIN QUERY TREE SELECT count() FROM t_constraints_where WHERE b < 2 SETTINGS allow_experimental_analyzer = 1; -- assumption -> 0 EXPLAIN SYNTAX SELECT count() FROM t_constraints_where WHERE b > 20 OR b < 8; -- assumption -> remove (b < 20) +EXPLAIN QUERY TREE SELECT count() FROM t_constraints_where WHERE b > 20 OR b < 8 SETTINGS allow_experimental_analyzer = 1; -- assumption -> remove (b < 20) +EXPLAIN SYNTAX SELECT count() FROM t_constraints_where PREWHERE b > 20 OR b < 8; -- assumption -> remove (b < 20) +EXPLAIN QUERY TREE SELECT count() FROM t_constraints_where PREWHERE b > 20 OR b < 8 SETTINGS allow_experimental_analyzer = 1; -- assumption -> remove (b < 20) DROP TABLE t_constraints_where; @@ -18,6 +24,7 @@ CREATE TABLE t_constraints_where(a UInt32, b UInt32, CONSTRAINT c1 ASSUME b < 10 INSERT INTO t_constraints_where VALUES (1, 7); -EXPLAIN SYNTAX SELECT count() FROM t_constraints_where WHERE b = 1 OR b < 18 OR b > 5; -- assumtion -> (b < 20) -> 0; +EXPLAIN SYNTAX SELECT count() FROM t_constraints_where WHERE b = 1 OR b < 18 OR b > 5; -- assumption -> (b < 20) -> 0; +EXPLAIN QUERY TREE SELECT count() FROM t_constraints_where WHERE b = 1 OR b < 18 OR b > 5 SETTINGS allow_experimental_analyzer = 1; -- assumption -> (b < 20) -> 0; DROP TABLE t_constraints_where; diff --git a/tests/queries/0_stateless/01623_constraints_column_swap.reference b/tests/queries/0_stateless/01623_constraints_column_swap.reference index 7ae4516fe9e..3639ad47228 100644 --- a/tests/queries/0_stateless/01623_constraints_column_swap.reference +++ b/tests/queries/0_stateless/01623_constraints_column_swap.reference @@ -3,51 +3,329 @@ SELECT (b AS b) + 3 AS `plus(b, 3)` FROM column_swap_test_test WHERE b = 1 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + plus(b, 3) UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8 + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 +SELECT + cityHash64(a) + 10, + b + 3 +FROM column_swap_test_test +PREWHERE cityHash64(a) = 1 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + plus(b, 3) UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8 + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.column_swap_test_test + PREWHERE + FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, (b AS b) + 3 AS `plus(b, 3)` FROM column_swap_test_test WHERE b = 0 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + plus(b, 3) UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8 + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, (b AS b) + 3 AS `plus(b, 3)` FROM column_swap_test_test WHERE b = 0 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + plus(b, 3) UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8 + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, (b AS b) + 3 AS `plus(b, 3)` FROM column_swap_test_test WHERE b = 1 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + plus(b, 3) UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8 + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT (b AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)` FROM column_swap_test_test WHERE b = 0 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_10, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: b, result_type: UInt64, source_id: 5 + CONSTANT id: 10, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT (cityHash64(a) AS `cityHash64(a)`) + 10 AS `plus(cityHash64(a), 10)`, a AS a FROM column_swap_test_test WHERE cityHash64(a) = 0 +QUERY id: 0 + PROJECTION COLUMNS + plus(cityHash64(a), 10) UInt64 + a String + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: cityHash64, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: a, result_type: String, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_10, constant_value_type: UInt8 + COLUMN id: 9, column_name: a, result_type: String, source_id: 7 + JOIN TREE + TABLE id: 7, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: cityHash64, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: a, result_type: String, source_id: 7 + CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT (cityHash64(a) AS b) + 10 AS `plus(b, 10)`, a AS a FROM column_swap_test_test WHERE cityHash64(a) = 0 +QUERY id: 0 + PROJECTION COLUMNS + plus(b, 10) UInt64 + a String + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: cityHash64, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: a, result_type: String, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_10, constant_value_type: UInt8 + COLUMN id: 9, column_name: a, result_type: String, source_id: 7 + JOIN TREE + TABLE id: 7, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: cityHash64, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 13, nodes: 1 + COLUMN id: 14, column_name: a, result_type: String, source_id: 7 + CONSTANT id: 15, constant_value: UInt64_0, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 SELECT a AS `substring(reverse(b), 1, 1)`, a AS a FROM column_swap_test_test WHERE a = \'c\' +QUERY id: 0 + PROJECTION COLUMNS + substring(reverse(b), 1, 1) String + a String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: String, source_id: 3 + COLUMN id: 4, column_name: a, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: String, source_id: 3 + CONSTANT id: 8, constant_value: \'c\', constant_value_type: String + SETTINGS allow_experimental_analyzer=1 SELECT a AS `substring(reverse(b), 1, 1)`, a AS a FROM column_swap_test_test WHERE a = \'c\' +QUERY id: 0 + PROJECTION COLUMNS + substring(reverse(b), 1, 1) String + a String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: String, source_id: 3 + COLUMN id: 4, column_name: a, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: String, source_id: 3 + CONSTANT id: 8, constant_value: \'c\', constant_value_type: String + SETTINGS allow_experimental_analyzer=1 SELECT a AS t1, a AS t2 FROM column_swap_test_test WHERE a = \'c\' +QUERY id: 0 + PROJECTION COLUMNS + t1 String + t2 String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: String, source_id: 3 + COLUMN id: 4, column_name: a, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: String, source_id: 3 + CONSTANT id: 8, constant_value: \'c\', constant_value_type: String + SETTINGS allow_experimental_analyzer=1 SELECT a AS `substring(reverse(b), 1, 1)` FROM column_swap_test_test WHERE a = \'c\' +QUERY id: 0 + PROJECTION COLUMNS + substring(reverse(b), 1, 1) String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: a, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.column_swap_test_test + WHERE + FUNCTION id: 4, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: a, result_type: String, source_id: 3 + CONSTANT id: 7, constant_value: \'c\', constant_value_type: String + SETTINGS allow_experimental_analyzer=1 SELECT a FROM t_bad_constraint +QUERY id: 0 + PROJECTION COLUMNS + a UInt32 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: a, result_type: UInt32, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.t_bad_constraint + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/01623_constraints_column_swap.sql b/tests/queries/0_stateless/01623_constraints_column_swap.sql index c81b37c8428..3219ee3cda7 100644 --- a/tests/queries/0_stateless/01623_constraints_column_swap.sql +++ b/tests/queries/0_stateless/01623_constraints_column_swap.sql @@ -13,13 +13,22 @@ INSERT INTO column_swap_test_test VALUES (1, 'cat', 1), (2, 'dog', 2); INSERT INTO column_swap_test_test SELECT number AS i, format('test {} kek {}', toString(number), toString(number + 10)) AS a, 1 AS b FROM system.numbers LIMIT 1000000; EXPLAIN SYNTAX SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE cityHash64(a) = 1; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE cityHash64(a) = 1 SETTINGS allow_experimental_analyzer = 1; +EXPLAIN SYNTAX SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test PREWHERE cityHash64(a) = 1; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test PREWHERE cityHash64(a) = 1 SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE cityHash64(a) = 0; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE cityHash64(a) = 0 SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE b = 0; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE b = 0 SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE b = 1; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10, b + 3 FROM column_swap_test_test WHERE b = 1 SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT cityHash64(a) + 10 FROM column_swap_test_test WHERE cityHash64(a) = 0; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10 FROM column_swap_test_test WHERE cityHash64(a) = 0 SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT cityHash64(a) + 10, a FROM column_swap_test_test WHERE cityHash64(a) = 0; +EXPLAIN QUERY TREE SELECT cityHash64(a) + 10, a FROM column_swap_test_test WHERE cityHash64(a) = 0 SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT b + 10, a FROM column_swap_test_test WHERE b = 0; +EXPLAIN QUERY TREE SELECT b + 10, a FROM column_swap_test_test WHERE b = 0 SETTINGS allow_experimental_analyzer = 1; DROP TABLE column_swap_test_test; @@ -27,9 +36,13 @@ CREATE TABLE column_swap_test_test (i Int64, a String, b String, CONSTRAINT c1 A INSERT INTO column_swap_test_test SELECT number AS i, toString(number) AS a, format('test {} kek {}', toString(number), toString(number + 10)) b FROM system.numbers LIMIT 1000000; EXPLAIN SYNTAX SELECT substring(reverse(b), 1, 1), a FROM column_swap_test_test WHERE a = 'c'; +EXPLAIN QUERY TREE SELECT substring(reverse(b), 1, 1), a FROM column_swap_test_test WHERE a = 'c' SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT substring(reverse(b), 1, 1), a FROM column_swap_test_test WHERE substring(reverse(b), 1, 1) = 'c'; +EXPLAIN QUERY TREE SELECT substring(reverse(b), 1, 1), a FROM column_swap_test_test WHERE substring(reverse(b), 1, 1) = 'c' SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT substring(reverse(b), 1, 1) AS t1, a AS t2 FROM column_swap_test_test WHERE substring(reverse(b), 1, 1) = 'c'; +EXPLAIN QUERY TREE SELECT substring(reverse(b), 1, 1) AS t1, a AS t2 FROM column_swap_test_test WHERE substring(reverse(b), 1, 1) = 'c' SETTINGS allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT substring(reverse(b), 1, 1) FROM column_swap_test_test WHERE substring(reverse(b), 1, 1) = 'c'; +EXPLAIN QUERY TREE SELECT substring(reverse(b), 1, 1) FROM column_swap_test_test WHERE substring(reverse(b), 1, 1) = 'c' SETTINGS allow_experimental_analyzer = 1; DROP TABLE column_swap_test_test; @@ -40,5 +53,6 @@ CREATE TABLE t_bad_constraint(a UInt32, s String, CONSTRAINT c1 ASSUME a = toUIn INSERT INTO t_bad_constraint SELECT number, randomPrintableASCII(100) FROM numbers(10000); EXPLAIN SYNTAX SELECT a FROM t_bad_constraint; +EXPLAIN QUERY TREE SELECT a FROM t_bad_constraint SETTINGS allow_experimental_analyzer = 1; DROP TABLE t_bad_constraint; diff --git a/tests/queries/0_stateless/01625_constraints_index_append.reference b/tests/queries/0_stateless/01625_constraints_index_append.reference index 0df5c429d9e..591d8a85897 100644 --- a/tests/queries/0_stateless/01625_constraints_index_append.reference +++ b/tests/queries/0_stateless/01625_constraints_index_append.reference @@ -2,14 +2,22 @@ SELECT i AS i FROM index_append_test_test PREWHERE a = 0 WHERE (a = 0) AND indexHint((i + 40) > 0) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 +1 SELECT i AS i FROM index_append_test_test PREWHERE a < 0 +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 +0 SELECT i AS i FROM index_append_test_test PREWHERE a >= 0 WHERE (a >= 0) AND indexHint((i + 40) > 0) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 +1 SELECT i AS i FROM index_append_test_test PREWHERE (2 * b) < 100 WHERE ((2 * b) < 100) AND indexHint(i < 100) +SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, optimize_move_to_prewhere = 1, optimize_substitute_columns = 1, optimize_append_index = 1 +1 diff --git a/tests/queries/0_stateless/01625_constraints_index_append.sh b/tests/queries/0_stateless/01625_constraints_index_append.sh new file mode 100755 index 00000000000..acceedbb1d1 --- /dev/null +++ b/tests/queries/0_stateless/01625_constraints_index_append.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS index_append_test_test;" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE index_append_test_test (i Int64, a UInt32, b UInt64, CONSTRAINT c1 ASSUME i <= 2 * b AND i + 40 > a) ENGINE = MergeTree() ORDER BY i;" +$CLICKHOUSE_CLIENT --query "INSERT INTO index_append_test_test VALUES (1, 10, 1), (2, 20, 2);" + +function run_with_settings() +{ + query="$1 SETTINGS convert_query_to_cnf = 1\ + , optimize_using_constraints = 1\ + , optimize_move_to_prewhere = 1\ + , optimize_substitute_columns = 1\ + , optimize_append_index = 1" + + if [[ $query =~ "EXPLAIN QUERY TREE" ]]; then query="${query}, allow_experimental_analyzer = 1"; fi + + $CLICKHOUSE_CLIENT --query="$query" + +} + +run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a = 0" +run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE a = 0" | grep -Fac "indexHint" +run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a < 0" +run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE a < 0" | grep -Fac "indexHint" +run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a >= 0" +run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE a >= 0" | grep -Fac "indexHint" +run_with_settings "EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE 2 * b < 100" +run_with_settings "EXPLAIN QUERY TREE SELECT i FROM index_append_test_test WHERE 2 * b < 100" | grep -Fac "indexHint" + +$CLICKHOUSE_CLIENT --query "DROP TABLE index_append_test_test;" diff --git a/tests/queries/0_stateless/01625_constraints_index_append.sql b/tests/queries/0_stateless/01625_constraints_index_append.sql deleted file mode 100644 index fbffc9c7f10..00000000000 --- a/tests/queries/0_stateless/01625_constraints_index_append.sql +++ /dev/null @@ -1,17 +0,0 @@ -SET convert_query_to_cnf = 1; -SET optimize_using_constraints = 1; -SET optimize_move_to_prewhere = 1; -SET optimize_substitute_columns = 1; -SET optimize_append_index = 1; - -DROP TABLE IF EXISTS index_append_test_test; - -CREATE TABLE index_append_test_test (i Int64, a UInt32, b UInt64, CONSTRAINT c1 ASSUME i <= 2 * b AND i + 40 > a) ENGINE = MergeTree() ORDER BY i; -INSERT INTO index_append_test_test VALUES (1, 10, 1), (2, 20, 2); - -EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a = 0; -EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a < 0; -EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE a >= 0; -EXPLAIN SYNTAX SELECT i FROM index_append_test_test WHERE 2 * b < 100; - -DROP TABLE index_append_test_test; diff --git a/tests/queries/0_stateless/01626_cnf_fuzz_long.python b/tests/queries/0_stateless/01626_cnf_fuzz_long.python index 10c12d14182..de9e4a21dbb 100644 --- a/tests/queries/0_stateless/01626_cnf_fuzz_long.python +++ b/tests/queries/0_stateless/01626_cnf_fuzz_long.python @@ -4,14 +4,18 @@ from random import randint, choices import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient client = ClickHouseClient() N = 10 -create_query = "CREATE TABLE t_cnf_fuzz(" + ", ".join([f"c{i} UInt8" for i in range(N)]) + ") ENGINE = Memory" +create_query = ( + "CREATE TABLE t_cnf_fuzz(" + + ", ".join([f"c{i} UInt8" for i in range(N)]) + + ") ENGINE = Memory" +) client.query("DROP TABLE IF EXISTS t_cnf_fuzz") client.query(create_query) @@ -35,6 +39,7 @@ client.query(insert_query) MAX_CLAUSES = 10 MAX_ATOMS = 5 + def generate_dnf(): clauses = [] num_clauses = randint(1, MAX_CLAUSES) @@ -42,12 +47,17 @@ def generate_dnf(): num_atoms = randint(1, MAX_ATOMS) atom_ids = choices(range(N), k=num_atoms) negates = choices([0, 1], k=num_atoms) - atoms = [f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)] + atoms = [ + f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates) + ] clauses.append("(" + " AND ".join(atoms) + ")") return " OR ".join(clauses) -select_query = "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}" + +select_query = ( + "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}" +) fail_report = """ Failed query: '{}'. diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql index 363f88c5ec9..aeabc05fdd3 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -13,9 +13,9 @@ create table data_01641 (key Int, value String) engine=MergeTree order by (key, SET max_block_size = 1000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; insert into data_01641 select number, toString(number) from numbers(120000); --- Definitely should fail and it proves that memory is tracked in OPTIMIZE query. set max_memory_usage='10Mi', max_untracked_memory=0; -optimize table data_01641 final; -- { serverError 241 } +-- It fails iff memory is tracked in OPTIMIZE query, but it doesn't. OPTIMIZE query doesn't rely on query context. +optimize table data_01641 final; drop table data_01641; diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.reference b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.reference index 613c455fc59..98bb953263a 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.reference +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.reference @@ -8,8 +8,6 @@ wide fsync_after_insert 1 wide fsync_after_insert,fsync_part_directory 1 -memory in_memory_parts_insert_sync -1 wide fsync_part_directory,vertical 1 2 diff --git a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql index ad0dfca0db2..dfc761e1764 100644 --- a/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_merge_tree_fsync_smoke.sql @@ -37,13 +37,6 @@ select * from data_01643; optimize table data_01643 final; drop table data_01643; -select 'memory in_memory_parts_insert_sync'; -create table data_01643 (key Int) engine=MergeTree() order by key settings min_rows_for_compact_part=2, in_memory_parts_insert_sync=1, fsync_after_insert=1, fsync_part_directory=1; -insert into data_01643 values (1); -select * from data_01643; -optimize table data_01643 final; -drop table data_01643; - select 'wide fsync_part_directory,vertical'; create table data_01643 (key Int) engine=MergeTree() order by key settings min_bytes_for_wide_part=0, fsync_part_directory=1, enable_vertical_merge_algorithm=1, vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0; insert into data_01643 values (1); diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.reference b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.reference index 613c455fc59..98bb953263a 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.reference +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.reference @@ -8,8 +8,6 @@ wide fsync_after_insert 1 wide fsync_after_insert,fsync_part_directory 1 -memory in_memory_parts_insert_sync -1 wide fsync_part_directory,vertical 1 2 diff --git a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql index bcce87e11db..54c30fa2b1a 100644 --- a/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql +++ b/tests/queries/0_stateless/01643_replicated_merge_tree_fsync_smoke.sql @@ -63,17 +63,6 @@ system sync replica rep_fsync_r2; drop table rep_fsync_r1; drop table rep_fsync_r2; -select 'memory in_memory_parts_insert_sync'; -create table rep_fsync_r1 (key Int) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/rep_fsync', 'r1') order by key settings min_rows_for_compact_part=2, in_memory_parts_insert_sync=1, fsync_after_insert=1, fsync_part_directory=1; -create table rep_fsync_r2 (key Int) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/rep_fsync', 'r2') order by key settings min_rows_for_compact_part=2, in_memory_parts_insert_sync=1, fsync_after_insert=1, fsync_part_directory=1; -insert into rep_fsync_r1 values (1); -system sync replica rep_fsync_r2; -select * from rep_fsync_r2; -optimize table rep_fsync_r1 final; -system sync replica rep_fsync_r2; -drop table rep_fsync_r1; -drop table rep_fsync_r2; - select 'wide fsync_part_directory,vertical'; create table rep_fsync_r1 (key Int) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/rep_fsync', 'r1') order by key settings min_bytes_for_wide_part=0, fsync_part_directory=1, enable_vertical_merge_algorithm=1, vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0; create table rep_fsync_r2 (key Int) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/rep_fsync', 'r2') order by key settings min_bytes_for_wide_part=0, fsync_part_directory=1, enable_vertical_merge_algorithm=1, vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0; diff --git a/tests/queries/0_stateless/01647_clickhouse_local_hung.sh b/tests/queries/0_stateless/01647_clickhouse_local_hung.sh index 04f32055ab6..4789db18b2e 100755 --- a/tests/queries/0_stateless/01647_clickhouse_local_hung.sh +++ b/tests/queries/0_stateless/01647_clickhouse_local_hung.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest set -e diff --git a/tests/queries/0_stateless/01651_map_functions.reference b/tests/queries/0_stateless/01651_map_functions.reference index 06adaf48cfd..60f1b6e0d0c 100644 --- a/tests/queries/0_stateless/01651_map_functions.reference +++ b/tests/queries/0_stateless/01651_map_functions.reference @@ -8,6 +8,8 @@ 0 ['name','age'] ['name','gender'] +{'name':'zhangsan','age':'10'} +{'name':'lisi','gender':'female'} 1 0 0 1 0 1 1 0 0 @@ -17,7 +19,20 @@ [1000] [1001] [1002] +{'1000':'2000','1000':'3000','1000':'4000'} +{'1001':'2002','1001':'3003','1001':'4004'} +{'1002':'2004','1002':'3006','1002':'4008'} {'aa':4,'bb':5} ['aa','bb'] [4,5] {'aa':4,'bb':5} 1 0 {0:0} 1 {0:0} 0 +{'aa':4,'bb':5} +{'aa':4,'bb':5} +{'aa':4,'bb':5} +{'aa':4,'bb':5} +{'aa':4,'bb':5} +{'aa':4,'bb':5} +{'aa':4,'bb':5} +{'aa':('a',4),'bb':('b',5)} +{'aa':('a',4),'bb':('b',5)} +{'aa':('a',4),'bb':('b',5)} diff --git a/tests/queries/0_stateless/01651_map_functions.sql b/tests/queries/0_stateless/01651_map_functions.sql index bbaaf9bee84..5942bf8b2c2 100644 --- a/tests/queries/0_stateless/01651_map_functions.sql +++ b/tests/queries/0_stateless/01651_map_functions.sql @@ -2,23 +2,25 @@ set allow_experimental_map_type = 1; -- String type drop table if exists table_map; -create table table_map (a Map(String, String), b String) engine = Memory; -insert into table_map values ({'name':'zhangsan', 'age':'10'}, 'name'), ({'name':'lisi', 'gender':'female'},'age'); +create table table_map (a Map(String, String), b String, c Array(String), d Array(String)) engine = Memory; +insert into table_map values ({'name':'zhangsan', 'age':'10'}, 'name', ['name', 'age'], ['zhangsan', '10']), ({'name':'lisi', 'gender':'female'},'age',['name', 'gender'], ['lisi', 'female']); select mapContains(a, 'name') from table_map; select mapContains(a, 'gender') from table_map; select mapContains(a, 'abc') from table_map; select mapContains(a, b) from table_map; -select mapContains(a, 10) from table_map; -- { serverError 386 } +select mapContains(a, 10) from table_map; -- { serverError NO_COMMON_TYPE } select mapKeys(a) from table_map; +select mapFromArrays(c, d) from table_map; drop table if exists table_map; -CREATE TABLE table_map (a Map(UInt8, Int), b UInt8, c UInt32) engine = MergeTree order by tuple(); -insert into table_map select map(number, number), number, number from numbers(1000, 3); +CREATE TABLE table_map (a Map(UInt8, Int), b UInt8, c UInt32, d Array(String), e Array(String)) engine = MergeTree order by tuple(); +insert into table_map select map(number, number), number, number, [number, number, number], [number*2, number*3, number*4] from numbers(1000, 3); select mapContains(a, b), mapContains(a, c), mapContains(a, 233) from table_map; -select mapContains(a, 'aaa') from table_map; -- { serverError 386 } -select mapContains(b, 'aaa') from table_map; -- { serverError 43 } +select mapContains(a, 'aaa') from table_map; -- { serverError NO_COMMON_TYPE } +select mapContains(b, 'aaa') from table_map; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } select mapKeys(a) from table_map; select mapValues(a) from table_map; +select mapFromArrays(d, e) from table_map; drop table if exists table_map; @@ -27,3 +29,18 @@ select map( 'aa', 4, 'bb' , 5) as m, mapKeys(m), mapValues(m); select map( 'aa', 4, 'bb' , 5) as m, mapContains(m, 'aa'), mapContains(m, 'k'); select map(0, 0) as m, mapContains(m, number % 2) from numbers(2); + +select mapFromArrays(['aa', 'bb'], [4, 5]); +select mapFromArrays(['aa', 'bb'], materialize([4, 5])) from numbers(2); +select mapFromArrays(materialize(['aa', 'bb']), [4, 5]) from numbers(2); +select mapFromArrays(materialize(['aa', 'bb']), materialize([4, 5])) from numbers(2); +select mapFromArrays('aa', [4, 5]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select mapFromArrays(['aa', 'bb'], 5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select mapFromArrays(['aa', 'bb'], [4, 5], [6, 7]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select mapFromArrays(['aa', 'bb'], [4, 5, 6]); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } +select mapFromArrays([[1,2], [3,4]], [4, 5, 6]); -- { serverError BAD_ARGUMENTS } + +select mapFromArrays(['aa', 'bb'], map('a', 4, 'b', 5)); +select mapFromArrays(['aa', 'bb'], materialize(map('a', 4, 'b', 5))) from numbers(2); +select mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb']) from numbers(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select mapFromArrays(['aa', 'bb'], map('a', 4)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } diff --git a/tests/queries/0_stateless/01654_test_writer_block_sequence.python b/tests/queries/0_stateless/01654_test_writer_block_sequence.python index e80cc273076..bc4e3da9ed5 100644 --- a/tests/queries/0_stateless/01654_test_writer_block_sequence.python +++ b/tests/queries/0_stateless/01654_test_writer_block_sequence.python @@ -5,15 +5,20 @@ import random import string CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient + def get_random_string(length): - return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) + return "".join( + random.choice(string.ascii_uppercase + string.digits) for _ in range(length) + ) + client = ClickHouseClient() + def insert_block(table_name, block_granularity_rows, block_rows): global client block_data = [] @@ -25,9 +30,12 @@ def insert_block(table_name, block_granularity_rows, block_rows): values_row = ", ".join("(1, '" + row + "')" for row in block_data) client.query("INSERT INTO {} VALUES {}".format(table_name, values_row)) + try: client.query("DROP TABLE IF EXISTS t") - client.query("CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0") + client.query( + "CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0" + ) client.query("SYSTEM STOP MERGES t") @@ -53,6 +61,10 @@ try: client.query("SYSTEM START MERGES t") client.query("OPTIMIZE TABLE t FINAL") - print(client.query_return_df("SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames")['C'][0]) + print( + client.query_return_df( + "SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames" + )["C"][0] + ) finally: client.query("DROP TABLE IF EXISTS t") diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index f870a52284c..48d99647b43 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -53,7 +53,7 @@ Filter column: notEquals(y, 0) 9 10 > one condition of filter should be pushed down after aggregating, other condition is casted Filter column -FUNCTION _CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4)) +FUNCTION and(minus(s, 4) :: 1, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) UInt8 : 2 Aggregating Filter column: notEquals(y, 0) 0 1 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index aaecdc390cb..ec856c9bf27 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -56,7 +56,7 @@ $CLICKHOUSE_CLIENT -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION _CAST(minus(s, 4) :: 1, UInt8 :: 3) -> and(notEquals(y, 0), minus(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 1, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) UInt8 : 2" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y diff --git a/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference b/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference index 00eb03bd5f0..f23cf03913b 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.reference @@ -1,15 +1,25 @@ Partial sorting plan optimize_read_in_window_order=0 Sort description: n ASC, x ASC + optimize_read_in_window_order=0, allow_experimental_analyzer=1 + Sort description: n ASC, x ASC optimize_read_in_window_order=1 Prefix sort description: n ASC Result sort description: n ASC, x ASC + optimize_read_in_window_order=1, allow_experimental_analyzer=1 + Prefix sort description: default.test_01655_plan_optimizations_optimize_read_in_window_order_n.n_0 ASC + Result sort description: default.test_01655_plan_optimizations_optimize_read_in_window_order_n.n_0 ASC, default.test_01655_plan_optimizations_optimize_read_in_window_order_n.x_1 ASC No sorting plan optimize_read_in_window_order=0 Sort description: n ASC, x ASC + optimize_read_in_window_order=0, allow_experimental_analyzer=1 + Sort description: default.test_01655_plan_optimizations_optimize_read_in_window_order_n_x.n_0 ASC, default.test_01655_plan_optimizations_optimize_read_in_window_order_n_x.x_1 ASC optimize_read_in_window_order=1 Prefix sort description: n ASC, x ASC Result sort description: n ASC, x ASC + optimize_read_in_window_order=1, allow_experimental_analyzer=1 + Prefix sort description: default.test_01655_plan_optimizations_optimize_read_in_window_order_n_x.n_0 ASC, default.test_01655_plan_optimizations_optimize_read_in_window_order_n_x.x_1 ASC + Result sort description: default.test_01655_plan_optimizations_optimize_read_in_window_order_n_x.n_0 ASC, default.test_01655_plan_optimizations_optimize_read_in_window_order_n_x.x_1 ASC Complex ORDER BY optimize_read_in_window_order=0 3 3 1 diff --git a/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.sh b/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.sh index a606f1a2f9e..24c8cf5052e 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations_optimize_read_in_window_order.sh @@ -19,17 +19,25 @@ $CLICKHOUSE_CLIENT -q "optimize table ${name}_n_x final" echo 'Partial sorting plan' echo ' optimize_read_in_window_order=0' -$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n SETTINGS optimize_read_in_order=0,optimize_read_in_window_order=0" | grep -i "sort description" +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n SETTINGS optimize_read_in_order=0,optimize_read_in_window_order=0,allow_experimental_analyzer=0" | grep -i "sort description" +echo ' optimize_read_in_window_order=0, allow_experimental_analyzer=1' +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n SETTINGS optimize_read_in_order=0,optimize_read_in_window_order=0,allow_experimental_analyzer=0" | grep -i "sort description" echo ' optimize_read_in_window_order=1' -$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n SETTINGS optimize_read_in_order=1" | grep -i "sort description" +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n SETTINGS optimize_read_in_order=1,allow_experimental_analyzer=0" | grep -i "sort description" +echo ' optimize_read_in_window_order=1, allow_experimental_analyzer=1' +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n SETTINGS optimize_read_in_order=1,allow_experimental_analyzer=1" | grep -i "sort description" echo 'No sorting plan' echo ' optimize_read_in_window_order=0' -$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n_x SETTINGS optimize_read_in_order=0,optimize_read_in_window_order=0" | grep -i "sort description" +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n_x SETTINGS optimize_read_in_order=0,optimize_read_in_window_order=0,allow_experimental_analyzer=0" | grep -i "sort description" +echo ' optimize_read_in_window_order=0, allow_experimental_analyzer=1' +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n_x SETTINGS optimize_read_in_order=0,optimize_read_in_window_order=0,allow_experimental_analyzer=1" | grep -i "sort description" echo ' optimize_read_in_window_order=1' -$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n_x SETTINGS optimize_read_in_order=1" | grep -i "sort description" +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n_x SETTINGS optimize_read_in_order=1,allow_experimental_analyzer=0" | grep -i "sort description" +echo ' optimize_read_in_window_order=1, allow_experimental_analyzer=1' +$CLICKHOUSE_CLIENT -q "explain plan actions=1, description=1 select n, sum(x) OVER (ORDER BY n, x ROWS BETWEEN 100 PRECEDING AND CURRENT ROW) from ${name}_n_x SETTINGS optimize_read_in_order=1,allow_experimental_analyzer=1" | grep -i "sort description" echo 'Complex ORDER BY' $CLICKHOUSE_CLIENT -q "CREATE TABLE ${name}_complex (unique1 Int32, unique2 Int32, ten Int32) ENGINE=MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192" diff --git a/tests/queries/0_stateless/01656_join_defaul_enum.sql b/tests/queries/0_stateless/01656_join_defaul_enum.sql index 8a0fc089b16..878936da3b5 100644 --- a/tests/queries/0_stateless/01656_join_defaul_enum.sql +++ b/tests/queries/0_stateless/01656_join_defaul_enum.sql @@ -1,18 +1,9 @@ -DROP DATABASE IF EXISTS test_01656; - -CREATE DATABASE test_01656; -USE test_01656; - -DROP TABLE IF EXISTS table_key; -DROP TABLE IF EXISTS table_with_enum; - -CREATE TABLE table_key (keycol UInt16) ENGINE = MergeTree() ORDER BY (keycol) PARTITION BY tuple(); +CREATE TABLE table_key (keycol UInt16) ENGINE = MergeTree() ORDER BY (keycol) PARTITION BY tuple() +as SELECT * FROM VALUES ( (1), (2), (3) ); CREATE TABLE table_with_enum (keycol UInt16, enum_col Enum8('First' = 1,'Second' = 2)) - ENGINE = MergeTree() ORDER BY (keycol) PARTITION BY tuple(); - -INSERT INTO table_key VALUES (1), (2), (3); -INSERT INTO table_with_enum VALUES (2, 'Second'), (4, 'Second'); + ENGINE = MergeTree() ORDER BY (keycol) PARTITION BY tuple() +as SELECT * FROM VALUES ( (2, 'Second'), (4, 'Second') ); SET join_algorithm = 'hash'; @@ -34,7 +25,6 @@ SELECT keycol, enum_col FROM table_with_enum LEFT JOIN table_key USING (keycol) SELECT keycol, enum_col FROM table_with_enum RIGHT JOIN table_key USING (keycol) ORDER BY keycol; SELECT keycol, enum_col FROM table_with_enum FULL JOIN table_key USING (keycol) ORDER BY keycol; -DROP TABLE IF EXISTS table_key; -DROP TABLE IF EXISTS table_with_enum; +DROP TABLE table_key; +DROP TABLE table_with_enum; -DROP DATABASE IF EXISTS test_01656; diff --git a/tests/queries/0_stateless/01660_system_parts_smoke.sql b/tests/queries/0_stateless/01660_system_parts_smoke.sql index 64cba86b8f6..ad73e33eee1 100644 --- a/tests/queries/0_stateless/01660_system_parts_smoke.sql +++ b/tests/queries/0_stateless/01660_system_parts_smoke.sql @@ -21,8 +21,8 @@ SELECT '# two parts'; INSERT INTO data_01660 VALUES (0); INSERT INTO data_01660 VALUES (1); SELECT _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; -SELECT name, _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; -SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660'; +SELECT name, _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' ORDER BY name; +SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' ORDER BY name; -- OPTIMIZE to create Outdated parts SELECT '# optimize'; @@ -35,7 +35,7 @@ SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND -- Inactive parts are clearing by async process also SELECT '# truncate'; TRUNCATE data_01660; -SELECT if (count() > 0, 'HAVE PARTS', 'NO PARTS'), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state; +SELECT if (count() > 0, 'HAVE PARTS', 'NO PARTS'), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state ORDER BY _state; -- But DROP does SELECT '# drop'; diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.reference b/tests/queries/0_stateless/01671_merge_join_and_constants.reference index efd814df893..f82033ab3c7 100644 --- a/tests/queries/0_stateless/01671_merge_join_and_constants.reference +++ b/tests/queries/0_stateless/01671_merge_join_and_constants.reference @@ -1,6 +1,6 @@ -┌─a─┬──────────b─┬─c─┬──────────d─┬─t2.'0.10'─┐ -│ a │ 2018-01-01 │ │ 1970-01-01 │ │ -│ b │ 2018-01-01 │ B │ 2018-01-01 │ 0.10 │ -│ c │ 2018-01-01 │ C │ 2018-01-01 │ 0.10 │ -└───┴────────────┴───┴────────────┴───────────┘ +┌─a─┬──────────b─┬─c─┬──────────d─┬─'0.10'─┬─c─┐ +│ a │ 2018-01-01 │ │ 1970-01-01 │ │ │ +│ b │ 2018-01-01 │ B │ 2018-01-01 │ 0.10 │ B │ +│ c │ 2018-01-01 │ C │ 2018-01-01 │ 0.10 │ C │ +└───┴────────────┴───┴────────────┴────────┴───┘ \N \N \N \N 0 0 diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.sql b/tests/queries/0_stateless/01671_merge_join_and_constants.sql index e9a60f11875..5cabd6f7f06 100644 --- a/tests/queries/0_stateless/01671_merge_join_and_constants.sql +++ b/tests/queries/0_stateless/01671_merge_join_and_constants.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS table1; DROP TABLE IF EXISTS table2; diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql index 7a2466c70d7..478e0039177 100644 --- a/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql +++ b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql @@ -1,7 +1,5 @@ -SET allow_experimental_analyzer = 1; - drop table if exists tab; create table tab (x UInt64, `arr.a` Array(UInt64), `arr.b` Array(UInt64)) engine = MergeTree order by x; -select x from tab array join arr prewhere x != 0 where arr; -- { serverError 43 } -select x from tab array join arr prewhere arr where x != 0; -- { serverError 43 } +select x from tab array join arr prewhere x != 0 where arr; -- { serverError 47, 59 } +select x from tab array join arr prewhere arr where x != 0; -- { serverError 47, 59 } drop table if exists tab; diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference index c66682ca038..cf3e942adfe 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference @@ -4,7 +4,6 @@ ReplacingMergeTree: OK JSONEachRow: OK clusterAllReplicas: OK SimpleAggregateFunction: OK -write_ahead_log_interval_ms_to_fsync: OK max_concurrent_queries_for_all_users: OK test_shard_localhost: OK default_path_test: OK diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index 617148de5a3..42ae5e84f44 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -89,8 +89,6 @@ client_compwords_positive=( clusterAllReplicas # system.data_type_families SimpleAggregateFunction - # system.merge_tree_settings - write_ahead_log_interval_ms_to_fsync # system.settings max_concurrent_queries_for_all_users # system.clusters diff --git a/tests/queries/0_stateless/01686_rocksdb.sql b/tests/queries/0_stateless/01686_rocksdb.sql index ad6f56772b0..f3177ce140e 100644 --- a/tests/queries/0_stateless/01686_rocksdb.sql +++ b/tests/queries/0_stateless/01686_rocksdb.sql @@ -24,7 +24,7 @@ SELECT * FROM 01686_test WHERE key IN (123, 456, -123) ORDER BY key; SELECT '--'; SELECT * FROM 01686_test WHERE key = 'Hello'; -- { serverError 53 } -DETACH TABLE 01686_test NO DELAY; +DETACH TABLE 01686_test SYNC; ATTACH TABLE 01686_test; SELECT * FROM 01686_test WHERE key IN (99, 999, 9999, -123) ORDER BY key; diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference index b6f5fe99ca1..b5b93c34c00 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm rand() TYPE minmax GRANULARITY 1,\n INDEX nn rand() TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 -metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n +CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm log2(i) TYPE minmax GRANULARITY 1,\n INDEX nn log2(i) TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm log2(i) TYPE minmax GRANULARITY 1, nn log2(i) TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql index 683bd271405..be0f7e8b710 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.sql @@ -2,9 +2,9 @@ drop table if exists x; -create table x(i int, index mm RAND() type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i; +create table x(i int, index mm LOG2(i) type minmax granularity 1, projection p (select MAX(i))) engine ReplicatedMergeTree('/clickhouse/tables/{database}/x', 'r') order by i; -alter table x add index nn RAND() type minmax granularity 1, add projection p2 (select MIN(i)); +alter table x add index nn LOG2(i) type minmax granularity 1, add projection p2 (select MIN(i)); show create x; diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql b/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql index 6de0d4f4e0c..2bb92aec713 100644 --- a/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql +++ b/tests/queries/0_stateless/01709_inactive_parts_to_throw_insert.sql @@ -7,6 +7,6 @@ insert into data_01709 values (2); optimize table data_01709 final; -insert into data_01709 values (3); -- { serverError 252; } +insert into data_01709 values (3); -- { serverError 252 } drop table data_01709; diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_hashing.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_hashing.sql index d5eaa2617a6..0429865e7b1 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_hashing.sql +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_hashing.sql @@ -1,4 +1,4 @@ -set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +set optimize_use_projections = 1, force_optimize_projection = 1; drop table if exists tp; diff --git a/tests/queries/0_stateless/01710_aggregate_projections.sh b/tests/queries/0_stateless/01710_aggregate_projections.sh index 561b8927579..326a564a208 100755 --- a/tests/queries/0_stateless/01710_aggregate_projections.sh +++ b/tests/queries/0_stateless/01710_aggregate_projections.sh @@ -8,22 +8,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "CREATE TABLE test_agg_proj (x Int32, y Int32, PROJECTION x_plus_y (SELECT sum(x - y), argMax(x, y) group by x + y)) ENGINE = MergeTree ORDER BY tuple() settings index_granularity = 1" $CLICKHOUSE_CLIENT -q "insert into test_agg_proj select intDiv(number, 2), -intDiv(number,3) - 1 from numbers(100)" -$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1" -$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1 format JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj group by x + y order by s desc limit 5 settings optimize_use_projections=1" +$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj group by x + y order by s desc limit 5 settings optimize_use_projections=1 format JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -q "select (x + y) * 2, sum(x - y) * 2 as s from test_agg_proj group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1" -$CLICKHOUSE_CLIENT -q "select (x + y) * 2, sum(x - y) * 2 as s from test_agg_proj group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1 format JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT -q "select (x + y) * 2, sum(x - y) * 2 as s from test_agg_proj group by x + y order by s desc limit 5 settings optimize_use_projections=1" +$CLICKHOUSE_CLIENT -q "select (x + y) * 2, sum(x - y) * 2 as s from test_agg_proj group by x + y order by s desc limit 5 settings optimize_use_projections=1 format JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -q "select intDiv(x + y, 2) as v, intDiv(x + y, 3), sum(x - y) as s from test_agg_proj group by intDiv(x + y, 2), intDiv(x + y, 3) order by s desc, v limit 5 settings allow_experimental_projection_optimization=1" -$CLICKHOUSE_CLIENT -q "select intDiv(x + y, 2) as v, intDiv(x + y, 3), sum(x - y) as s from test_agg_proj group by intDiv(x + y, 2), intDiv(x + y, 3) order by s desc, v limit 5 settings allow_experimental_projection_optimization=1 format JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT -q "select intDiv(x + y, 2) as v, intDiv(x + y, 3), sum(x - y) as s from test_agg_proj group by intDiv(x + y, 2), intDiv(x + y, 3) order by s desc, v limit 5 settings optimize_use_projections=1" +$CLICKHOUSE_CLIENT -q "select intDiv(x + y, 2) as v, intDiv(x + y, 3), sum(x - y) as s from test_agg_proj group by intDiv(x + y, 2), intDiv(x + y, 3) order by s desc, v limit 5 settings optimize_use_projections=1 format JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(x, y) * sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings allow_experimental_projection_optimization=1" -$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(x, y) * sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings allow_experimental_projection_optimization=1 format JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(x, y) * sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings optimize_use_projections=1" +$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(x, y) * sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings optimize_use_projections=1 format JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(y, x), sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings allow_experimental_projection_optimization=1" -$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(y, x), sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings allow_experimental_projection_optimization=1 format JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(y, x), sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings optimize_use_projections=1" +$CLICKHOUSE_CLIENT -q "select x + y + 1, argMax(y, x), sum(x - y) as s from test_agg_proj group by x + y + 1 order by s desc limit 5 settings optimize_use_projections=1 format JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj prewhere (x + y) % 2 = 1 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1" -$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj prewhere (x + y) % 2 = 1 group by x + y order by s desc limit 5 settings allow_experimental_projection_optimization=1 format JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj prewhere (x + y) % 2 = 1 group by x + y order by s desc limit 5 settings optimize_use_projections=1" +$CLICKHOUSE_CLIENT -q "select x + y, sum(x - y) as s from test_agg_proj prewhere (x + y) % 2 = 1 group by x + y order by s desc limit 5 settings optimize_use_projections=1 format JSON" | grep "rows_read" $CLICKHOUSE_CLIENT -q "drop table test_agg_proj" diff --git a/tests/queries/0_stateless/01710_force_use_projection.sql b/tests/queries/0_stateless/01710_force_use_projection.sql index 8931c65e34e..af6ca69c540 100644 --- a/tests/queries/0_stateless/01710_force_use_projection.sql +++ b/tests/queries/0_stateless/01710_force_use_projection.sql @@ -2,7 +2,7 @@ drop table if exists tp; create table tp (d1 Int32, d2 Int32, eventcnt Int64, projection p (select sum(eventcnt) group by d1)) engine = MergeTree order by (d1, d2); -set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +set optimize_use_projections = 1, force_optimize_projection = 1; select sum(eventcnt) eventcnt, d1 from tp group by d1; diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index f7645414187..c17f0e1e1fb 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -4,7 +4,7 @@ create table d (i int, j int) engine MergeTree partition by i % 2 order by tuple insert into d select number, number from numbers(10000); -set max_rows_to_read = 2, allow_experimental_projection_optimization = 1; +set max_rows_to_read = 2, optimize_use_projections = 1; select min(i), max(i), count() from d; select min(i), max(i), count() from d group by _partition_id order by _partition_id; diff --git a/tests/queries/0_stateless/01710_normal_projection_fix1.sql b/tests/queries/0_stateless/01710_normal_projection_fix1.sql index b4d7c6e8734..cce8309046a 100644 --- a/tests/queries/0_stateless/01710_normal_projection_fix1.sql +++ b/tests/queries/0_stateless/01710_normal_projection_fix1.sql @@ -7,11 +7,14 @@ insert into t values (1, 2); alter table t add projection x (select * order by j); insert into t values (1, 4); +insert into t values (1, 5); -set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +set optimize_use_projections = 1, force_optimize_projection = 1; select i from t prewhere j = 4; SELECT j = 2, i FROM t PREWHERE j = 2; +SELECT j = -1, j = NULL FROM t WHERE j = -1; + drop table t; diff --git a/tests/queries/0_stateless/01710_normal_projections.reference b/tests/queries/0_stateless/01710_normal_projections.reference index 22ee679ce53..ccb4359b0e9 100644 --- a/tests/queries/0_stateless/01710_normal_projections.reference +++ b/tests/queries/0_stateless/01710_normal_projections.reference @@ -1,5 +1,5 @@ select where x < 10 -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 0, optimize_use_projections = 0 0 4294967295 1 4294967294 2 4294967293 @@ -10,7 +10,7 @@ optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0 7 4294967288 8 4294967287 9 4294967286 -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 0, optimize_use_projections = 1 0 4294967295 1 4294967294 2 4294967293 @@ -21,7 +21,7 @@ optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1 7 4294967288 8 4294967287 9 4294967286 -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 1, optimize_use_projections = 0 0 4294967295 1 4294967294 2 4294967293 @@ -32,7 +32,7 @@ optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0 7 4294967288 8 4294967287 9 4294967286 -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 1, optimize_use_projections = 1 0 4294967295 1 4294967294 2 4294967293 @@ -43,16 +43,16 @@ optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1 7 4294967288 8 4294967287 9 4294967286 -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 0, optimize_use_projections = 0 "rows_read": 100, -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 0, optimize_use_projections = 1 "rows_read": 100, -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 1, optimize_use_projections = 0 "rows_read": 100, -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 1, optimize_use_projections = 1 "rows_read": 100, select where y > 4294967286 -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 0, optimize_use_projections = 0 0 4294967295 1 4294967294 2 4294967293 @@ -62,7 +62,7 @@ optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0 6 4294967289 7 4294967288 8 4294967287 -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 0, optimize_use_projections = 1 0 4294967295 1 4294967294 2 4294967293 @@ -72,7 +72,7 @@ optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1 6 4294967289 7 4294967288 8 4294967287 -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 1, optimize_use_projections = 0 0 4294967295 1 4294967294 2 4294967293 @@ -82,7 +82,7 @@ optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0 6 4294967289 7 4294967288 8 4294967287 -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 1, optimize_use_projections = 1 0 4294967295 1 4294967294 2 4294967293 @@ -92,12 +92,12 @@ optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1 6 4294967289 7 4294967288 8 4294967287 -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 0, optimize_use_projections = 0 "rows_read": 100, -optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 0, optimize_use_projections = 1 "rows_read": 100, -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0 +optimize_move_to_prewhere = 1, optimize_use_projections = 0 "rows_read": 100, -optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1 +optimize_move_to_prewhere = 1, optimize_use_projections = 1 "rows_read": 100, 50 diff --git a/tests/queries/0_stateless/01710_normal_projections.sh b/tests/queries/0_stateless/01710_normal_projections.sh index 3f2114b9a2b..5eed9b23573 100755 --- a/tests/queries/0_stateless/01710_normal_projections.sh +++ b/tests/queries/0_stateless/01710_normal_projections.sh @@ -9,73 +9,73 @@ $CLICKHOUSE_CLIENT -q "insert into test_sort_proj select number, toUInt32(-numbe echo "select where x < 10" -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 0" -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 1" -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 0" -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 1" -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" | grep rows_read + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 0" | grep rows_read -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" | grep rows_read + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 1" | grep rows_read -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" | grep rows_read + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 0" | grep rows_read -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE x < 10 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" | grep rows_read + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 1" | grep rows_read echo "select where y > 4294967286" -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 0" -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 1" -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 0" -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 1" -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 0" | grep rows_read + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 0" | grep rows_read -echo "optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 0, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 0, allow_experimental_projection_optimization = 1" | grep rows_read + SETTINGS optimize_move_to_prewhere = 0, optimize_use_projections = 1" | grep rows_read -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 0" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 0" | grep rows_read + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 0" | grep rows_read -echo "optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" +echo "optimize_move_to_prewhere = 1, optimize_use_projections = 1" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_sort_proj WHERE y > 4294967286 order by x FORMAT JSON - SETTINGS optimize_move_to_prewhere = 1, allow_experimental_projection_optimization = 1" | grep rows_read + SETTINGS optimize_move_to_prewhere = 1, optimize_use_projections = 1" | grep rows_read $CLICKHOUSE_CLIENT -q "ALTER TABLE test_sort_proj DELETE WHERE x % 2 = 0 SETTINGS mutations_sync=2;" $CLICKHOUSE_CLIENT -q "SELECT count() from test_sort_proj;" diff --git a/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference new file mode 100644 index 00000000000..f2a527c4d8d --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.reference @@ -0,0 +1 @@ +1554690688 diff --git a/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql new file mode 100644 index 00000000000..a77720b6580 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_aggregate_functions_null_for_empty.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (c0 Int32, PRIMARY KEY (c0)) ENGINE=MergeTree; +INSERT INTO t1 VALUES (1554690688); + +SELECT MIN(t1.c0) FROM t1 SETTINGS aggregate_functions_null_for_empty = 1; + +DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql index 31d32da0ed3..e4fb1816c89 100644 --- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql @@ -1,3 +1,5 @@ +-- Tags: disabled +-- FIXME https://github.com/ClickHouse/ClickHouse/issues/49552 -- Test that check the correctness of the result for optimize_aggregation_in_order and projections, -- not that this optimization will take place. @@ -21,12 +23,12 @@ ENGINE = MergeTree ORDER BY (key, ts); INSERT INTO normal SELECT - 1, + number, toDateTime('2021-12-06 00:00:00') + number, number FROM numbers(100000); -SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection=1; +SET optimize_use_projections=1, optimize_aggregation_in_order=1, force_optimize_projection=1; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; @@ -58,7 +60,7 @@ INSERT INTO agg SELECT number FROM numbers(100000); -SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; +SET optimize_use_projections=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM agg WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM agg WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; diff --git a/tests/queries/0_stateless/01710_projection_array_join.sql b/tests/queries/0_stateless/01710_projection_array_join.sql index cd18d9282b9..509e053e5bf 100644 --- a/tests/queries/0_stateless/01710_projection_array_join.sql +++ b/tests/queries/0_stateless/01710_projection_array_join.sql @@ -1,4 +1,4 @@ -set allow_experimental_projection_optimization = 1; +set optimize_use_projections = 1; drop table if exists x; diff --git a/tests/queries/0_stateless/01710_projection_detach_part.sql b/tests/queries/0_stateless/01710_projection_detach_part.sql index d28c0848d42..c77a2abfbf9 100644 --- a/tests/queries/0_stateless/01710_projection_detach_part.sql +++ b/tests/queries/0_stateless/01710_projection_detach_part.sql @@ -1,4 +1,4 @@ -set allow_experimental_projection_optimization = 1; +set optimize_use_projections = 1; drop table if exists t; diff --git a/tests/queries/0_stateless/01710_projection_fetch_long.reference b/tests/queries/0_stateless/01710_projection_fetch_long.reference index abce5410b26..c7834c75d02 100644 --- a/tests/queries/0_stateless/01710_projection_fetch_long.reference +++ b/tests/queries/0_stateless/01710_projection_fetch_long.reference @@ -10,8 +10,8 @@ 3 3 4 4 0 -CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n ),\n PROJECTION pp\n (\n SELECT \n x,\n count()\n GROUP BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32, index_granularity = 8192 +CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n ),\n PROJECTION pp\n (\n SELECT \n x,\n count()\n GROUP BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32, index_granularity = 8192 2 -CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n ),\n PROJECTION pp\n (\n SELECT \n x,\n count()\n GROUP BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32, index_granularity = 8192 -CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n ),\n PROJECTION pp\n (\n SELECT \n x,\n count()\n GROUP BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32, index_granularity = 8192 -CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32, index_granularity = 8192 +CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n ),\n PROJECTION pp\n (\n SELECT \n x,\n count()\n GROUP BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32, index_granularity = 8192 +CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n ),\n PROJECTION pp\n (\n SELECT \n x,\n count()\n GROUP BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32, index_granularity = 8192 +CREATE TABLE default.tp_2\n(\n `x` Int32,\n `y` Int32,\n PROJECTION p\n (\n SELECT \n x,\n y\n ORDER BY x\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{shard}/01710_projection_fetch_default\', \'2_{replica}\')\nORDER BY y\nSETTINGS min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32, index_granularity = 8192 diff --git a/tests/queries/0_stateless/01710_projection_fetch_long.sql b/tests/queries/0_stateless/01710_projection_fetch_long.sql index 6c41c69254e..13f7a293934 100644 --- a/tests/queries/0_stateless/01710_projection_fetch_long.sql +++ b/tests/queries/0_stateless/01710_projection_fetch_long.sql @@ -3,9 +3,9 @@ drop table if exists tp_1; drop table if exists tp_2; -create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/01710_projection_fetch_' || currentDatabase(), '1_{replica}') order by y settings min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32; +create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/01710_projection_fetch_' || currentDatabase(), '1_{replica}') order by y settings min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32; -create table tp_2 (x Int32, y Int32, projection p (select x, y order by x)) engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/01710_projection_fetch_' || currentDatabase(), '2_{replica}') order by y settings min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32; +create table tp_2 (x Int32, y Int32, projection p (select x, y order by x)) engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/01710_projection_fetch_' || currentDatabase(), '2_{replica}') order by y settings min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32; insert into tp_1 select number, number from numbers(3); diff --git a/tests/queries/0_stateless/01710_projection_in_index.sql b/tests/queries/0_stateless/01710_projection_in_index.sql index 87f5e79e37e..b858418584c 100644 --- a/tests/queries/0_stateless/01710_projection_in_index.sql +++ b/tests/queries/0_stateless/01710_projection_in_index.sql @@ -4,7 +4,7 @@ create table t (i int, j int, k int, projection p (select * order by j)) engine insert into t select number, number, number from numbers(10); -set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; +set optimize_use_projections = 1, max_rows_to_read = 3; select * from t where i < 5 and j in (1, 2); diff --git a/tests/queries/0_stateless/01710_projection_in_set.sql b/tests/queries/0_stateless/01710_projection_in_set.sql index 5bc84645cec..3d1abd0c09f 100644 --- a/tests/queries/0_stateless/01710_projection_in_set.sql +++ b/tests/queries/0_stateless/01710_projection_in_set.sql @@ -3,7 +3,7 @@ create table x (i UInt64, j UInt64, k UInt64, projection agg (select sum(j), avg insert into x values (1, 2, 3); -set allow_experimental_projection_optimization = 1, use_index_for_in_with_subqueries = 0; +set optimize_use_projections = 1, use_index_for_in_with_subqueries = 0; select sum(j), avg(k) from x where i in (select number from numbers(4)); diff --git a/tests/queries/0_stateless/01710_projection_mutation.sql b/tests/queries/0_stateless/01710_projection_mutation.sql index 30e84f23a53..d963cde7409 100644 --- a/tests/queries/0_stateless/01710_projection_mutation.sql +++ b/tests/queries/0_stateless/01710_projection_mutation.sql @@ -4,6 +4,6 @@ CREATE TABLE t (`key` UInt32, `created_at` Date, `value` UInt32, PROJECTION xxx INSERT INTO t SELECT 1 AS key, today() + (number % 30), number FROM numbers(1000); -ALTER TABLE t UPDATE value = 0 WHERE (value > 0) AND (created_at >= '2021-12-21') SETTINGS allow_experimental_projection_optimization = 1; +ALTER TABLE t UPDATE value = 0 WHERE (value > 0) AND (created_at >= '2021-12-21') SETTINGS optimize_use_projections = 1; DROP TABLE IF EXISTS t; diff --git a/tests/queries/0_stateless/01710_projection_optimize_materialize.sql b/tests/queries/0_stateless/01710_projection_optimize_materialize.sql index d8251aabaf6..e704c3e5610 100644 --- a/tests/queries/0_stateless/01710_projection_optimize_materialize.sql +++ b/tests/queries/0_stateless/01710_projection_optimize_materialize.sql @@ -1,6 +1,7 @@ +-- Tags: no-random-merge-tree-settings drop table if exists z; -create table z (pk Int64, d Date, id UInt64, c UInt64) Engine MergeTree partition by d order by pk ; +create table z (pk Int64, d Date, id UInt64, c UInt64) Engine MergeTree partition by d order by pk settings ratio_of_defaults_for_sparse_serialization = 1.0; insert into z select number, '2021-10-24', intDiv (number, 10000), 1 from numbers(1000000); optimize table z final; diff --git a/tests/queries/0_stateless/01710_projection_part_check.sql b/tests/queries/0_stateless/01710_projection_part_check.sql index c889bd323a7..b15d9d7525e 100644 --- a/tests/queries/0_stateless/01710_projection_part_check.sql +++ b/tests/queries/0_stateless/01710_projection_part_check.sql @@ -1,6 +1,6 @@ drop table if exists tp; -create table tp (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y settings min_rows_for_compact_part = 2, min_rows_for_wide_part = 4, min_bytes_for_compact_part = 16, min_bytes_for_wide_part = 32; +create table tp (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y settings min_rows_for_wide_part = 4, min_bytes_for_wide_part = 32; insert into tp select number, number from numbers(3); insert into tp select number, number from numbers(5); diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index e8a4a384017..2c4378bb7a4 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -12,7 +12,7 @@ optimize table t final; alter table t materialize projection p_norm settings mutations_sync = 1; -set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; +set optimize_use_projections = 1, max_rows_to_read = 3; select c18 from t where c1 < 0; diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql index 5dac2f05da9..94c15d00e2a 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.sql +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -2,20 +2,20 @@ drop table if exists t; create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; -select s from t join (select toUInt16(1) as s) x using (s) order by s settings allow_experimental_projection_optimization = 1; -select s from t join (select toUInt16(1) as s) x using (s) order by s settings allow_experimental_projection_optimization = 0; +select s from t join (select toUInt16(1) as s) x using (s) order by s settings optimize_use_projections = 1; +select s from t join (select toUInt16(1) as s) x using (s) order by s settings optimize_use_projections = 0; drop table t; drop table if exists mt; create table mt (id1 Int8, id2 Int8) Engine=MergeTree order by tuple(); -select id1 as alias1 from mt all inner join (select id2 as alias1 from mt) as t using (alias1) order by id1 settings allow_experimental_projection_optimization = 1; -select id1 from mt all inner join (select id2 as id1 from mt) as t using (id1) order by id1 settings allow_experimental_projection_optimization = 1; -select id2 as id1 from mt all inner join (select id1 from mt) as t using (id1) order by id1 settings allow_experimental_projection_optimization = 1; +select alias1 from (select id1, id1 as alias1 from mt) as l all inner join (select id2 as alias1 from mt) as t using (alias1) order by l.id1 settings optimize_use_projections = 1; +select id1 from mt all inner join (select id2 as id1 from mt) as t using (id1) order by id1 settings optimize_use_projections = 1; +select id2 as id1 from mt all inner join (select id1 from mt) as t using (id1) order by id1 settings optimize_use_projections = 1; drop table mt; drop table if exists j; create table j (id1 Int8, id2 Int8, projection p (select id1, id2 order by id2)) Engine=MergeTree order by id1 settings index_granularity = 1; insert into j select number, number from numbers(10); -select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) order by id1 settings allow_experimental_projection_optimization = 1; +select alias1 from (select id1, id1 as alias1 from j) as l all inner join (select id2, id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) order by id1 settings optimize_use_projections = 1; drop table j; diff --git a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql index 734aa659146..877fca4590d 100644 --- a/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql +++ b/tests/queries/0_stateless/01710_projection_with_mixed_pipeline.sql @@ -4,6 +4,6 @@ create table t (x UInt32) engine = MergeTree order by tuple() settings index_gra insert into t select number from numbers(100); alter table t add projection p (select uniqHLL12(x)); insert into t select number + 100 from numbers(100); -select uniqHLL12(x) from t settings allow_experimental_projection_optimization = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307; } +select uniqHLL12(x) from t settings optimize_use_projections = 1, max_bytes_to_read=400, max_block_size=8; -- { serverError 307 } drop table if exists t; diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql index cbabd3ec598..a96339e30fa 100644 --- a/tests/queries/0_stateless/01710_projections.sql +++ b/tests/queries/0_stateless/01710_projections.sql @@ -1,13 +1,13 @@ drop table if exists projection_test; -create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain); +create table projection_test (`sum(block_count)` UInt64, domain_alias UInt64 alias length(domain), datetime DateTime, domain LowCardinality(String), x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64), projection p (select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) group by dt_m, domain)) engine MergeTree partition by toDate(datetime) order by (toStartOfTenMinutes(datetime), domain) settings index_granularity_bytes = 10000000; insert into projection_test with rowNumberInAllBlocks() as id select 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * from generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) limit 1000 settings max_threads = 1; -set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +set optimize_use_projections = 1, force_optimize_projection = 1; select * from projection_test; -- { serverError 584 } -select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) from projection_test join (select 1) x using (1) where domain = '1' group by dt_m order by dt_m; -- { serverError 584 } +select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) from projection_test join (select 1) x on 1 where domain = '1' group by dt_m order by dt_m; -- { serverError 584 } select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration) from projection_test where domain = '1' group by dt_m order by dt_m; @@ -39,7 +39,7 @@ select toStartOfMinute(datetime) dt_m, domain, sum(retry_count) / sum(duration), select toStartOfHour(toStartOfMinute(datetime)) dt_h, uniqHLL12(x_id), uniqHLL12(y_id) from projection_test group by dt_h order by dt_h; -- found by fuzzer -SET enable_positional_arguments = 0; +SET enable_positional_arguments = 0, force_optimize_projection = 0; SELECT 2, -1 FROM projection_test PREWHERE domain_alias = 1. WHERE domain = NULL GROUP BY -9223372036854775808 ORDER BY countIf(first_time = 0) / count(-2147483649) DESC NULLS LAST, 1048576 DESC NULLS LAST; drop table if exists projection_test; @@ -47,6 +47,6 @@ drop table if exists projection_test; drop table if exists projection_without_key; create table projection_without_key (key UInt32, PROJECTION x (SELECT max(key))) engine MergeTree order by key; insert into projection_without_key select number from numbers(1000); -set force_optimize_projection = 1, allow_experimental_projection_optimization = 1; +set force_optimize_projection = 1, optimize_use_projections = 1; select max(key) from projection_without_key; drop table projection_without_key; diff --git a/tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.reference b/tests/queries/0_stateless/01710_projections_and_duplicate_columms.reference similarity index 100% rename from tests/queries/0_stateless/00970_live_view_watch_events_http_heartbeat.reference rename to tests/queries/0_stateless/01710_projections_and_duplicate_columms.reference diff --git a/tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql b/tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql new file mode 100644 index 00000000000..74a7aea418a --- /dev/null +++ b/tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql @@ -0,0 +1,8 @@ +drop table if exists projection_test__fuzz_0; +set allow_suspicious_low_cardinality_types=1; + +CREATE TABLE projection_test__fuzz_0 (`sum(block_count)` UInt64, `domain_alias` UInt64 ALIAS length(domain), `datetime` DateTime, `domain` LowCardinality(String), `x_id` String, `y_id` String, `block_count` Int64, `retry_count` Int64, `duration` Decimal(76, 13), `kbytes` LowCardinality(Int64), `buffer_time` Int64, `first_time` UInt256, `total_bytes` LowCardinality(Nullable(UInt64)), `valid_bytes` Nullable(UInt64), `completed_bytes` Nullable(UInt64), `fixed_bytes` LowCardinality(Nullable(UInt64)), `force_bytes` Int256, PROJECTION p (SELECT toStartOfMinute(datetime) AS dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) GROUP BY dt_m, domain)) ENGINE = MergeTree PARTITION BY toDate(datetime) ORDER BY (toStartOfTenMinutes(datetime), domain) SETTINGS index_granularity_bytes = 10000000; +INSERT INTO projection_test__fuzz_0 SETTINGS max_threads = 1 WITH rowNumberInAllBlocks() AS id SELECT 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * FROM generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) LIMIT 1000 SETTINGS max_threads = 1; +SELECT '-21474836.48', 10000000000., '', count(kbytes), '', 10.0001, toStartOfMinute(datetime) AS dt_m, 10, NULL FROM projection_test__fuzz_0 GROUP BY dt_m WITH ROLLUP WITH TOTALS ORDER BY count(retry_count / duration) ASC NULLS LAST, 100000000000000000000. ASC NULLS FIRST format Null; + +drop table projection_test__fuzz_0; diff --git a/tests/queries/0_stateless/01710_projections_group_by_no_key.sql b/tests/queries/0_stateless/01710_projections_group_by_no_key.sql index eefc03afb7a..98545bdd05a 100644 --- a/tests/queries/0_stateless/01710_projections_group_by_no_key.sql +++ b/tests/queries/0_stateless/01710_projections_group_by_no_key.sql @@ -2,7 +2,7 @@ drop table if exists projection_without_key; create table projection_without_key (key UInt32, PROJECTION x (SELECT sum(key) group by key % 3)) engine MergeTree order by key; insert into projection_without_key select number from numbers(1000); -select sum(key) from projection_without_key settings allow_experimental_projection_optimization = 1; -select sum(key) from projection_without_key settings allow_experimental_projection_optimization = 0; +select sum(key) from projection_without_key settings optimize_use_projections = 1; +select sum(key) from projection_without_key settings optimize_use_projections = 0; drop table projection_without_key; diff --git a/tests/queries/0_stateless/01710_projections_in_distributed_query.sql b/tests/queries/0_stateless/01710_projections_in_distributed_query.sql index fa734b605cd..616bf3b5505 100644 --- a/tests/queries/0_stateless/01710_projections_in_distributed_query.sql +++ b/tests/queries/0_stateless/01710_projections_in_distributed_query.sql @@ -1,12 +1,14 @@ -- Tags: distributed +set enable_memory_bound_merging_of_aggregation_results=0; + drop table if exists projection_test; create table projection_test (dt DateTime, cost Int64, projection p (select toStartOfMinute(dt) dt_m, sum(cost) group by dt_m)) engine MergeTree partition by toDate(dt) order by dt; insert into projection_test with rowNumberInAllBlocks() as id select toDateTime('2020-10-24 00:00:00') + (id / 20), * from generateRandom('cost Int64', 10, 10, 1) limit 1000 settings max_threads = 1; -set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +set optimize_use_projections = 1, force_optimize_projection = 1; select toStartOfMinute(dt) dt_m, sum(cost) from projection_test group by dt_m; select sum(cost) from projection_test; diff --git a/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh index 2c6a6ef35eb..a166837e01a 100755 --- a/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh @@ -38,7 +38,7 @@ function run_query() echo "$query" local opts=( - --allow_experimental_projection_optimization 1 + --optimize_use_projections 1 --force_optimize_projection 1 --log_processors_profiles 1 --query_id "$query_id" diff --git a/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh index 5a9c480c78c..ee73974e8a4 100755 --- a/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh @@ -44,7 +44,7 @@ function run_query() echo "$query" local opts=( - --allow_experimental_projection_optimization 1 + --optimize_use_projections 1 --force_optimize_projection 1 --log_processors_profiles 1 --query_id "$query_id" diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql index b96609391b5..5c24131b438 100644 --- a/tests/queries/0_stateless/01715_table_function_view_fix.sql +++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql @@ -1,3 +1,3 @@ SELECT view(SELECT 1); -- { clientError 62 } -SELECT sumIf(dummy, dummy) FROM remote('127.0.0.{1,2}', numbers(2, 100), view(SELECT CAST(NULL, 'Nullable(UInt8)') AS dummy FROM system.one)); -- { serverError 183 } +SELECT sumIf(dummy, dummy) FROM remote('127.0.0.{1,2}', numbers(2, 100), view(SELECT CAST(NULL, 'Nullable(UInt8)') AS dummy FROM system.one)); -- { serverError UNKNOWN_FUNCTION } diff --git a/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 b/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 index f7439c8c56c..b08bcd3330e 100644 --- a/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 +++ b/tests/queries/0_stateless/01720_join_implicit_cast.sql.j2 @@ -47,7 +47,6 @@ SELECT any(toTypeName(a)) == 'Int32' AND any(toTypeName(b)) == 'Nullable(Int64)' SELECT any(toTypeName(a)) == 'Int32' AND any(toTypeName(b)) == 'Nullable(Int64)' FROM t_ab1 RIGHT JOIN t_ab2 USING (a, b); SELECT any(toTypeName(a)) == 'Int32' AND any(toTypeName(b)) == 'Nullable(Int64)' FROM t_ab1 INNER JOIN t_ab2 USING (a, b); -SELECT * FROM ( SELECT a, b as "_CAST(a, Int32)" FROM t_ab1 ) t_ab1 FULL JOIN t_ab2 ON (t_ab1.a == t_ab2.a); -- { serverError 44 } SELECT * FROM ( SELECT a, b as "_CAST(a, Int32)" FROM t_ab1 ) t_ab1 FULL JOIN t_ab2 USING (a) FORMAT Null; {% endfor %} diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 index e9f32087439..ae43aa7195c 100644 --- a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 +++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 @@ -1,7 +1,6 @@ {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%} === {{ join_algorithm }} === = full = -{% if join_algorithm not in ['grace_hash'] -%} -4 0 196 -3 0 197 -2 0 198 @@ -17,7 +16,6 @@ 8 108 \N 9 109 \N 10 110 \N -{% endif -%} = left = 1 101 201 2 102 202 @@ -30,7 +28,6 @@ 9 109 \N 10 110 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} -4 0 196 -3 0 197 -2 0 198 @@ -41,7 +38,6 @@ 3 103 203 4 104 204 5 105 205 -{% endif -%} = inner = 1 101 201 2 102 202 @@ -49,7 +45,6 @@ 4 104 204 5 105 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 0 0 -3 0 0 -2 @@ -65,7 +60,6 @@ 8 8 0 9 9 0 10 10 0 -{% endif -%} = left = 1 1 1 2 2 2 @@ -78,7 +72,6 @@ 9 9 0 10 10 0 = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 0 0 -3 0 0 -2 @@ -89,7 +82,6 @@ 3 3 3 4 4 4 5 5 5 -{% endif -%} = inner = 1 1 1 2 2 2 @@ -98,7 +90,6 @@ 5 5 5 = join on = = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -114,7 +105,6 @@ 8 108 0 \N 9 109 0 \N 10 110 0 \N -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -127,7 +117,6 @@ 9 109 0 \N 10 110 0 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -138,7 +127,6 @@ 3 103 3 203 4 104 4 204 5 105 5 205 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -146,7 +134,6 @@ 4 104 4 204 5 105 5 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -162,7 +149,6 @@ 8 108 0 \N 9 109 0 \N 10 110 0 \N -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -175,7 +161,6 @@ 9 109 0 \N 10 110 0 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -186,7 +171,6 @@ 3 103 3 203 4 104 4 204 5 105 5 205 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -196,7 +180,6 @@ = agg = 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -205,13 +188,11 @@ 1 55 1055 0 0 -10 0 990 1 55 15 1055 1015 -{% endif -%} = types = 1 1 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -219,11 +200,9 @@ 1 1 1 -{% endif -%} {% if join_algorithm not in ['full_sorting_merge'] -%} === join use nulls === = full = -{% if join_algorithm not in ['grace_hash'] -%} -4 \N 196 -3 \N 197 -2 \N 198 @@ -239,7 +218,6 @@ 8 108 \N 9 109 \N 10 110 \N -{% endif -%} = left = 1 101 201 2 102 202 @@ -252,7 +230,6 @@ 9 109 \N 10 110 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} -4 \N 196 -3 \N 197 -2 \N 198 @@ -263,7 +240,6 @@ 3 103 203 4 104 204 5 105 205 -{% endif -%} = inner = 1 101 201 2 102 202 @@ -271,7 +247,6 @@ 4 104 204 5 105 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 2 2 2 3 3 3 @@ -287,7 +262,6 @@ \N \N -2 \N \N -1 \N \N 0 -{% endif -%} = left = 1 1 1 2 2 2 @@ -300,7 +274,6 @@ 9 9 \N 10 10 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 2 2 2 3 3 3 @@ -311,7 +284,6 @@ \N \N -2 \N \N -1 \N \N 0 -{% endif -%} = inner = 1 1 1 2 2 2 @@ -320,7 +292,6 @@ 5 5 5 = join on = = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -336,7 +307,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -349,7 +319,6 @@ 9 109 \N \N 10 110 \N \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -360,7 +329,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -368,7 +336,6 @@ 4 104 4 204 5 105 5 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -384,7 +351,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -397,7 +363,6 @@ 9 109 \N \N 10 110 \N \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -408,7 +373,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -418,7 +382,6 @@ = agg = 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -427,13 +390,11 @@ 1 55 1055 1 55 15 1055 1015 \N \N -10 \N 990 -{% endif -%} = types = 1 1 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -442,5 +403,4 @@ 1 1 {% endif -%} -{% endif -%} {% endfor -%} diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 index f5321939f28..38f71f4c5ec 100644 --- a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 +++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 @@ -10,7 +10,6 @@ INSERT INTO t1 SELECT number as a, 100 + number as b FROM system.numbers LIMIT 1 INSERT INTO t2 SELECT number - 5 as a, 200 + number - 5 as b FROM system.numbers LIMIT 1, 10; {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%} diff --git a/tests/queries/0_stateless/01771_bloom_filter_not_has.sql b/tests/queries/0_stateless/01771_bloom_filter_not_has.sql index ab0e3d308f9..f945cbde56b 100644 --- a/tests/queries/0_stateless/01771_bloom_filter_not_has.sql +++ b/tests/queries/0_stateless/01771_bloom_filter_not_has.sql @@ -1,3 +1,4 @@ +-- Tags: no-parallel, long DROP TABLE IF EXISTS bloom_filter_null_array; CREATE TABLE bloom_filter_null_array (v Array(Int32), INDEX idx v TYPE bloom_filter GRANULARITY 3) ENGINE = MergeTree() ORDER BY v; INSERT INTO bloom_filter_null_array SELECT [number] FROM numbers(10000000); diff --git a/tests/queries/0_stateless/01774_tuple_null_in.reference b/tests/queries/0_stateless/01774_tuple_null_in.reference index aa47d0d46d4..573541ac970 100644 --- a/tests/queries/0_stateless/01774_tuple_null_in.reference +++ b/tests/queries/0_stateless/01774_tuple_null_in.reference @@ -1,2 +1 @@ 0 -0 diff --git a/tests/queries/0_stateless/01774_tuple_null_in.sql b/tests/queries/0_stateless/01774_tuple_null_in.sql index a9cc39e8840..f08e1a067e5 100644 --- a/tests/queries/0_stateless/01774_tuple_null_in.sql +++ b/tests/queries/0_stateless/01774_tuple_null_in.sql @@ -1,2 +1 @@ -SELECT (NULL, NULL) = (8, 0) OR (NULL, NULL) = (3, 2) OR (NULL, NULL) = (0, 0) OR (NULL, NULL) = (3, 1); SELECT (NULL, NULL) IN ((NULL, 0), (3, 1), (3, 2), (8, 0), (NULL, NULL)); diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.reference b/tests/queries/0_stateless/01780_column_sparse_distinct.reference index bb0cebc6540..beb45208e7b 100644 --- a/tests/queries/0_stateless/01780_column_sparse_distinct.reference +++ b/tests/queries/0_stateless/01780_column_sparse_distinct.reference @@ -5,3 +5,4 @@ all_2_2_0 v Sparse 2 3 4 +5 diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.sql b/tests/queries/0_stateless/01780_column_sparse_distinct.sql index 502ca7600d4..e98bada1aac 100644 --- a/tests/queries/0_stateless/01780_column_sparse_distinct.sql +++ b/tests/queries/0_stateless/01780_column_sparse_distinct.sql @@ -7,7 +7,7 @@ SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; SYSTEM STOP MERGES t_sparse_distinct; -INSERT INTO t_sparse_distinct SELECT number, number % 5 FROM numbers(100000); +INSERT INTO t_sparse_distinct SELECT number, number % 6 FROM numbers(100000); INSERT INTO t_sparse_distinct SELECT number, number % 100 = 0 FROM numbers(100000); SELECT name, column, serialization_kind diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 4a3fe99710b..e6628813dbd 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -24,12 +24,12 @@ Name: t_minmax Description: minmax GRANULARITY 2 Parts: 1/2 - Granules: 2/6 + Granules: 4/6 Skip Name: t_set Description: set GRANULARITY 2 Parts: 1/1 - Granules: 1/2 + Granules: 2/4 ----------------- "Node Type": "ReadFromMergeTree", "Description": "default.test_index", @@ -68,7 +68,7 @@ "Initial Parts": 2, "Selected Parts": 1, "Initial Granules": 6, - "Selected Granules": 2 + "Selected Granules": 4 }, { "Type": "Skip", @@ -76,8 +76,8 @@ "Description": "set GRANULARITY 2", "Initial Parts": 1, "Selected Parts": 1, - "Initial Granules": 2, - "Selected Granules": 1 + "Initial Granules": 4, + "Selected Granules": 2 } ] } diff --git a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference index f3be69d3279..b0d8284faa5 100644 --- a/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference +++ b/tests/queries/0_stateless/01791_dist_INSERT_block_structure_mismatch.reference @@ -1,7 +1,7 @@ DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done. DistributedSink: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done. - default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done - default.dist_01683.DirectoryMonitor: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done + default.dist_01683.DirectoryMonitor.default: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done + default.dist_01683.DirectoryMonitor.default: Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 0)), implicit conversion will be done 1 1 2 diff --git a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh index d995e3a1370..4b75102e9cf 100755 --- a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh +++ b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh @@ -60,6 +60,8 @@ function test_distributed_push_down_limit_with_query_log() # disable hedged requests to avoid excessive log entries --use_hedged_requests 0 + --async_query_sending_for_remote 0 + "$@" ) diff --git a/tests/queries/0_stateless/01823_explain_json.reference b/tests/queries/0_stateless/01823_explain_json.reference index 9df7c16e4f4..befbf82f4fb 100644 --- a/tests/queries/0_stateless/01823_explain_json.reference +++ b/tests/queries/0_stateless/01823_explain_json.reference @@ -37,63 +37,59 @@ "Node Type": "Aggregating", "Header": [ { - "Name": "number", + "Name": "number_0", "Type": "UInt64" }, { - "Name": "plus(number, 1)", - "Type": "UInt64" - }, - { - "Name": "quantile(0.2)(number)", + "Name": "quantile(0.2_Float64)(number_0)", "Type": "Float64" }, { - "Name": "sumIf(number, greater(number, 0))", + "Name": "sumIf(number_0, greater(number_0, 0_UInt8))", "Type": "UInt64" } ], - "Keys": ["number", "plus(number, 1)"], + "Keys": ["number_0"], "Aggregates": [ { - "Name": "quantile(0.2)(number)", + "Name": "quantile(0.2_Float64)(number_0)", "Function": { "Name": "quantile", "Parameters": ["0.2"], "Argument Types": ["UInt64"], "Result Type": "Float64" }, - "Arguments": ["number"] + "Arguments": ["number_0"] }, { - "Name": "sumIf(number, greater(number, 0))", + "Name": "sumIf(number_0, greater(number_0, 0_UInt8))", "Function": { "Name": "sumIf", "Argument Types": ["UInt64", "UInt8"], "Result Type": "UInt64" }, - "Arguments": ["number", "greater(number, 0)"] + "Arguments": ["number_0", "greater(number_0, 0_UInt8)"] } ], -------- "Node Type": "ArrayJoin", "Left": false, - "Columns": ["x", "y"], + "Columns": ["x_0", "y_1"], -------- "Node Type": "Distinct", - "Columns": ["intDiv(number, 3)", "intDiv(number, 2)"], + "Columns": ["intDiv(number_0, 2_UInt8)", "intDiv(number_0, 3_UInt8)"], -- "Node Type": "Distinct", - "Columns": ["intDiv(number, 3)", "intDiv(number, 2)"], + "Columns": ["intDiv(number_0, 2_UInt8)", "intDiv(number_0, 3_UInt8)"], -------- "Sort Description": [ { - "Column": "number", + "Column": "number_0", "Ascending": false, "With Fill": false }, { - "Column": "plus(number, 1)", + "Column": "plus(number_0, 1_UInt8)", "Ascending": true, "With Fill": false } diff --git a/tests/queries/0_stateless/01823_explain_json.sh b/tests/queries/0_stateless/01823_explain_json.sh index 7868bc0cc78..39128773069 100755 --- a/tests/queries/0_stateless/01823_explain_json.sh +++ b/tests/queries/0_stateless/01823_explain_json.sh @@ -5,26 +5,29 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, description = 0 SELECT 1 UNION ALL SELECT 2 FORMAT TSVRaw" +opts=( + "--allow_experimental_analyzer=1" +) +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, description = 0 SELECT 1 UNION ALL SELECT 2 FORMAT TSVRaw" echo "--------" -$CLICKHOUSE_CLIENT -q "explain json = 1, description = 0, header = 1 select 1, 2 + dummy FORMAT TSVRaw" 2> /dev/null | grep Header -m 1 -A 8 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "explain json = 1, description = 0, header = 1 select 1, 2 + dummy FORMAT TSVRaw" 2> /dev/null | grep Header -m 1 -A 8 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, header = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, header = 1, description = 0 SELECT quantile(0.2)(number), sumIf(number, number > 0) from numbers(2) group by number, number + 1 FORMAT TSVRaw - " | grep Aggregating -A 40 + " | grep Aggregating -A 36 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, description = 0 SELECT x, y from numbers(2) array join [number, 1] as x, [number + 1] as y FORMAT TSVRaw " | grep ArrayJoin -A 2 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, description = 0 SELECT distinct intDiv(number, 2), intDiv(number, 3) from numbers(10) FORMAT TSVRaw " | grep Distinct -A 1 echo "--------" -$CLICKHOUSE_CLIENT -q "EXPLAIN json = 1, actions = 1, description = 0 +$CLICKHOUSE_CLIENT "${opts[@]}" -q "EXPLAIN json = 1, actions = 1, description = 0 SELECT number + 1 from numbers(10) order by number desc, number + 1 limit 3 FORMAT TSVRaw " | grep "Sort Description" -A 12 diff --git a/tests/queries/0_stateless/01852_multiple_joins_with_union_join.reference b/tests/queries/0_stateless/01852_multiple_joins_with_union_join.reference index eb37a198435..85e6138dc5d 100644 --- a/tests/queries/0_stateless/01852_multiple_joins_with_union_join.reference +++ b/tests/queries/0_stateless/01852_multiple_joins_with_union_join.reference @@ -1,2 +1,2 @@ -1 1 1 1 -1 1 1 1 +1 1 1 +1 1 1 diff --git a/tests/queries/0_stateless/01852_multiple_joins_with_union_join.sql b/tests/queries/0_stateless/01852_multiple_joins_with_union_join.sql index 39818fa51c2..8c6937eb581 100644 --- a/tests/queries/0_stateless/01852_multiple_joins_with_union_join.sql +++ b/tests/queries/0_stateless/01852_multiple_joins_with_union_join.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS v1; DROP TABLE IF EXISTS v2; @@ -17,7 +19,7 @@ JOIN v1 AS t2 USING (id) CROSS JOIN v2 AS n1 CROSS JOIN v2 AS n2; -SELECT * FROM v1 AS t1 JOIN v1 AS t2 USING (id) JOIN v1 AS t3 USING (value); -- { serverError NOT_IMPLEMENTED } +SELECT * FROM v1 AS t1 JOIN v1 AS t2 USING (id) JOIN v1 AS t3 USING (value); -- { serverError UNKNOWN_IDENTIFIER } DROP TABLE IF EXISTS v1; DROP TABLE IF EXISTS v2; diff --git a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python index 4f6878665aa..7d98a24e83e 100644 --- a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python +++ b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from http.server import SimpleHTTPRequestHandler,HTTPServer +from http.server import SimpleHTTPRequestHandler, HTTPServer import socket import csv import sys @@ -21,6 +21,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -31,8 +32,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -42,16 +44,24 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) # Because we need to check the content of file.csv we can create this content and avoid reading csv CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n" @@ -59,19 +69,24 @@ CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555 # Choose compression method # (Will change during test, need to check standard data sending, to make sure that nothing broke) -COMPRESS_METHOD = 'none' -ADDING_ENDING = '' -ENDINGS = ['.gz', '.xz'] +COMPRESS_METHOD = "none" +ADDING_ENDING = "" +ENDINGS = [".gz", ".xz"] SEND_ENCODING = True + def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -80,18 +95,19 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + # Server with head method which is useful for debuging by hands class HttpProcessor(SimpleHTTPRequestHandler): def _set_headers(self): self.send_response(200) if SEND_ENCODING: - self.send_header('Content-Encoding', COMPRESS_METHOD) - if COMPRESS_METHOD == 'none': - self.send_header('Content-Length', len(CSV_DATA.encode())) + self.send_header("Content-Encoding", COMPRESS_METHOD) + if COMPRESS_METHOD == "none": + self.send_header("Content-Length", len(CSV_DATA.encode())) else: self.compress_data() - self.send_header('Content-Length', len(self.data)) - self.send_header('Content-Type', 'text/csv') + self.send_header("Content-Length", len(self.data)) + self.send_header("Content-Type", "text/csv") self.end_headers() def do_HEAD(self): @@ -99,18 +115,17 @@ class HttpProcessor(SimpleHTTPRequestHandler): return def compress_data(self): - if COMPRESS_METHOD == 'gzip': + if COMPRESS_METHOD == "gzip": self.data = gzip.compress((CSV_DATA).encode()) - elif COMPRESS_METHOD == 'lzma': + elif COMPRESS_METHOD == "lzma": self.data = lzma.compress((CSV_DATA).encode()) else: - self.data = 'WRONG CONVERSATION'.encode() - + self.data = "WRONG CONVERSATION".encode() def do_GET(self): self._set_headers() - if COMPRESS_METHOD == 'none': + if COMPRESS_METHOD == "none": self.wfile.write(CSV_DATA.encode()) else: self.wfile.write(self.data) @@ -119,9 +134,11 @@ class HttpProcessor(SimpleHTTPRequestHandler): def log_message(self, format, *args): return + class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(requests_amount): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) @@ -135,52 +152,60 @@ def start_server(requests_amount): t = threading.Thread(target=real_func) return t + ##################################################################### # Testing area. ##################################################################### -def test_select(dict_name="", schema="word String, counter UInt32", requests=[], answers=[], test_data=""): + +def test_select( + dict_name="", + schema="word String, counter UInt32", + requests=[], + answers=[], + test_data="", +): global ADDING_ENDING global SEND_ENCODING global COMPRESS_METHOD for i in range(len(requests)): if i > 2: - ADDING_ENDING = ENDINGS[i-3] + ADDING_ENDING = ENDINGS[i - 3] SEND_ENCODING = False if dict_name: get_ch_answer("drop dictionary if exists {}".format(dict_name)) - get_ch_answer('''CREATE DICTIONARY {} ({}) + get_ch_answer( + """CREATE DICTIONARY {} ({}) PRIMARY KEY word SOURCE(HTTP(url '{}' format 'CSV')) LAYOUT(complex_key_hashed()) - LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING)) + LIFETIME(0)""".format( + dict_name, schema, HTTP_SERVER_URL_STR + "/test.csv" + ADDING_ENDING + ) + ) COMPRESS_METHOD = requests[i] print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING) check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i]) + def main(): # first three for encoding, second three for url - insert_requests = [ - 'none', - 'gzip', - 'lzma', - 'gzip', - 'lzma' - ] + insert_requests = ["none", "gzip", "lzma", "gzip", "lzma"] # This answers got experemently in non compressed mode and they are correct - answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5 + answers = ["""Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213"""] * 5 t = start_server(len(insert_requests)) t.start() - test_select(dict_name="test_table_select", requests=insert_requests, answers=answers) + test_select( + dict_name="test_table_select", requests=insert_requests, answers=answers + ) t.join() print("PASSED") - if __name__ == "__main__": try: main() @@ -191,5 +216,3 @@ if __name__ == "__main__": sys.stderr.flush() os._exit(1) - - diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 index d2cc066a1b1..fafefd72cb8 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 @@ -31,7 +31,6 @@ SELECT t1.key, t1.key2 FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key == SELECT '--'; SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2; SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION } -SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT '--'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id > 2; @@ -71,8 +70,8 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id; -- { serverError SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } -SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 403 } -SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 403 } +SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } +SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } diff --git a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 index 1704fedb92b..e4b704247b2 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 @@ -68,8 +68,8 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id; -- { serverError SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } -SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 403 } -SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 403 } +SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } +SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } diff --git a/tests/queries/0_stateless/01883_with_grouping_sets.reference b/tests/queries/0_stateless/01883_with_grouping_sets.reference index 8fae10a05a4..499e930541f 100644 --- a/tests/queries/0_stateless/01883_with_grouping_sets.reference +++ b/tests/queries/0_stateless/01883_with_grouping_sets.reference @@ -13,8 +13,7 @@ ExpressionTransform Copy 1 → 2 (Expression) ExpressionTransform - (ReadFromStorage) - Memory 0 → 1 + (ReadFromMemoryStorage) 1 0 1 4500 1 0 3 4700 1 0 5 4900 diff --git a/tests/queries/0_stateless/01888_read_int_safe.sql b/tests/queries/0_stateless/01888_read_int_safe.sql index 3aea8e38ab0..197338775c4 100644 --- a/tests/queries/0_stateless/01888_read_int_safe.sql +++ b/tests/queries/0_stateless/01888_read_int_safe.sql @@ -1,10 +1,10 @@ -select toInt64('--1'); -- { serverError 72; } -select toInt64('+-1'); -- { serverError 72; } -select toInt64('++1'); -- { serverError 72; } -select toInt64('++'); -- { serverError 72; } -select toInt64('+'); -- { serverError 72; } -select toInt64('1+1'); -- { serverError 6; } -select toInt64('1-1'); -- { serverError 6; } -select toInt64(''); -- { serverError 32; } +select toInt64('--1'); -- { serverError 72 } +select toInt64('+-1'); -- { serverError 72 } +select toInt64('++1'); -- { serverError 72 } +select toInt64('++'); -- { serverError 72 } +select toInt64('+'); -- { serverError 72 } +select toInt64('1+1'); -- { serverError 6 } +select toInt64('1-1'); -- { serverError 6 } +select toInt64(''); -- { serverError 32 } select toInt64('1'); select toInt64('-1'); diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference index c2c106e8632..cb8e19ea2a0 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.reference +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -31,6 +31,26 @@ select JSON_VALUE('{"a":"\\n\\u0000"}', '$.a'); \n\0 select JSON_VALUE('{"a":"\\u263a"}', '$.a'); ☺ +select JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true; +\N +select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true; +{"world":"!"} +SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello') settings function_json_value_return_type_allow_complex=true; +["world","world2"] +SELECT JSON_VALUE('{"1key":1}', '$.1key'); +1 +SELECT JSON_VALUE('{"hello":1}', '$[hello]'); +1 +SELECT JSON_VALUE('{"hello":1}', '$["hello"]'); +1 +SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']'); +1 +SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]'); +1 +SELECT JSON_VALUE('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_QUERY--'; --JSON_QUERY-- SELECT JSON_QUERY('{"hello":1}', '$'); @@ -55,6 +75,20 @@ SELECT JSON_QUERY('', '$.hello'); SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); [0, 1, 4, 0, -1, -4] +SELECT JSON_QUERY('{"1key":1}', '$.1key'); +[1] +SELECT JSON_QUERY('{"hello":1}', '$[hello]'); +[1] +SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); +[1] +SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); +[1] +SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]'); +[1] +SELECT JSON_QUERY('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_EXISTS--'; --JSON_EXISTS-- SELECT JSON_EXISTS('{"hello":1}', '$'); diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index e816443382c..947b0171ec6 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -17,6 +17,18 @@ SELECT JSON_VALUE('{"hello":"\\uD83C\\uDF3A \\uD83C\\uDF38 \\uD83C\\uDF37 Hello, SELECT JSON_VALUE('{"a":"Hello \\"World\\" \\\\"}', '$.a'); select JSON_VALUE('{"a":"\\n\\u0000"}', '$.a'); select JSON_VALUE('{"a":"\\u263a"}', '$.a'); +select JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true; +select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true; +SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello') settings function_json_value_return_type_allow_complex=true; +SELECT JSON_VALUE('{"1key":1}', '$.1key'); +SELECT JSON_VALUE('{"hello":1}', '$[hello]'); +SELECT JSON_VALUE('{"hello":1}', '$["hello"]'); +SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']'); +SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]'); +SELECT JSON_VALUE('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_QUERY--'; SELECT JSON_QUERY('{"hello":1}', '$'); @@ -30,6 +42,15 @@ SELECT JSON_QUERY('{"hello":{"world":"!"}}', '$.hello'); SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string) SELECT JSON_QUERY('', '$.hello'); SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); +SELECT JSON_QUERY('{"1key":1}', '$.1key'); +SELECT JSON_QUERY('{"hello":1}', '$[hello]'); +SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); +SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); +SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]'); +SELECT JSON_QUERY('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_EXISTS--'; SELECT JSON_EXISTS('{"hello":1}', '$'); diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.reference b/tests/queries/0_stateless/01889_sqlite_read_write.reference index d6b1e6f41c1..9f2b382e41e 100644 --- a/tests/queries/0_stateless/01889_sqlite_read_write.reference +++ b/tests/queries/0_stateless/01889_sqlite_read_write.reference @@ -36,6 +36,11 @@ line1 1 line2 2 line3 3 line4 4 +test schema inference +col1 Nullable(String) +col2 Nullable(Int32) +col1 Nullable(String) +col2 Nullable(Int32) test path in clickhouse-local line1 1 line2 2 diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh index e732155dbcc..7691d2e3c2c 100755 --- a/tests/queries/0_stateless/01889_sqlite_read_write.sh +++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh @@ -87,6 +87,14 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO TABLE FUNCTION sqlite('${DB_PATH}', 't ${CLICKHOUSE_CLIENT} --query="SELECT * FROM sqlite('${DB_PATH}', 'table1') ORDER BY col2" +${CLICKHOUSE_CLIENT} --query="select 'test schema inference'"; +${CLICKHOUSE_CLIENT} --query="CREATE TABLE sqlite_table3_inferred_engine ENGINE = SQLite('${DB_PATH}', 'table3')" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE sqlite_table3_inferred_function AS sqlite('${DB_PATH}', 'table3')" +${CLICKHOUSE_CLIENT} --query="DESCRIBE TABLE sqlite_table3_inferred_engine;" +${CLICKHOUSE_CLIENT} --query="DESCRIBE TABLE sqlite_table3_inferred_function;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE sqlite_table3_inferred_engine;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE sqlite_table3_inferred_function;" + sqlite3 "${DB_PATH2}" 'DROP TABLE IF EXISTS table1' sqlite3 "${DB_PATH2}" 'CREATE TABLE table1 (col1 text, col2 smallint);' sqlite3 "${DB_PATH2}" "INSERT INTO table1 VALUES ('line1', 1), ('line2', 2), ('line3', 3)" diff --git a/tests/queries/0_stateless/01890_cross_join_explain_crash.reference b/tests/queries/0_stateless/01890_cross_join_explain_crash.reference index 76315843adb..c08ba9c71b3 100644 --- a/tests/queries/0_stateless/01890_cross_join_explain_crash.reference +++ b/tests/queries/0_stateless/01890_cross_join_explain_crash.reference @@ -1,3 +1,4 @@ +1 1 1 2 1 1 1 2 1 1 1 2 diff --git a/tests/queries/0_stateless/01890_cross_join_explain_crash.sql b/tests/queries/0_stateless/01890_cross_join_explain_crash.sql index 20a1956ea6b..bb2bc606870 100644 --- a/tests/queries/0_stateless/01890_cross_join_explain_crash.sql +++ b/tests/queries/0_stateless/01890_cross_join_explain_crash.sql @@ -1,8 +1,7 @@ +SET allow_experimental_analyzer = 1; SET joined_subquery_requires_alias = 0; -select * FROM (SELECT 1), (SELECT 1), (SELECT 1); -- { serverError 352 } --- This queries work by luck. --- Feel free to remove then if it is the only failed test. +select * FROM (SELECT 1), (SELECT 1), (SELECT 1); select * from (select 2), (select 1) as a, (select 1) as b; select * from (select 1) as a, (select 2), (select 1) as b; select * from (select 1) as a, (select 1) as b, (select 2); diff --git a/tests/queries/0_stateless/01891_partition_hash.reference b/tests/queries/0_stateless/01891_partition_hash.reference index 56d11075e50..c5814777dfe 100644 --- a/tests/queries/0_stateless/01891_partition_hash.reference +++ b/tests/queries/0_stateless/01891_partition_hash.reference @@ -1 +1,2 @@ 6ba51fa36c625adab5d58007c96e32bf +ebc1c2f37455caea601feeb840757dd3 diff --git a/tests/queries/0_stateless/01891_partition_hash.sql b/tests/queries/0_stateless/01891_partition_hash.sql index f56ed6a4ff4..894594dd465 100644 --- a/tests/queries/0_stateless/01891_partition_hash.sql +++ b/tests/queries/0_stateless/01891_partition_hash.sql @@ -1,7 +1,32 @@ -drop table if exists tab; -create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple(); -insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); +DROP TABLE IF EXISTS tab; +CREATE TABLE tab ( + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + i128 Int128, + i256 Int256, + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + u128 UInt128, + u256 UInt256, + id UUID, + s String, + fs FixedString(33), + a Array(UInt8), + t Tuple(UInt16, UInt32), + d Date, + dt DateTime('Asia/Istanbul'), + dt64 DateTime64(3, 'Asia/Istanbul'), + dec128 Decimal128(3), + dec256 Decimal256(4), + lc LowCardinality(String)) +engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) ORDER BY tuple(); +INSERT INTO tab VALUES (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); +INSERT INTO tab VALUES (123, 12345, 1234567890, 1234567890000000000, 123456789000000000000000000000000000000, 123456789000000000000000000000000000000000000000000000000000000000000000000000, 123, 12345, 1234567890, 1234567890000000000, 123456789000000000000000000000000000000, 123456789000000000000000000000000000000000000000000000000000000000000000000000, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); -- Here we check that partition id did not change. -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server. -select partition_id from system.parts where table = 'tab' and database = currentDatabase(); -drop table if exists tab; +SELECT partition_id FROM system.parts WHERE table = 'tab' AND database = currentDatabase(); +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/01905_to_json_string.reference b/tests/queries/0_stateless/01905_to_json_string.reference index 33d435f8e1a..ec4f4e48bde 100644 --- a/tests/queries/0_stateless/01905_to_json_string.reference +++ b/tests/queries/0_stateless/01905_to_json_string.reference @@ -1,3 +1,3 @@ -[] 2947817982 "&" -69802.9769 "w" -1.9158530982937093e25 ["2003-05-15","1988-03-19 06:13:49","2090-04-14 03:58:26.029","91943d2e-480d-66b5-ee4c-1b5bb8eb7256"] "O" [] -[-115] 481807067 ",{MM" -170235.0663 "o" 3.3808659558052087e155 ["2055-01-12","2070-08-09 03:49:21","2068-11-30 09:36:49.672","20b0e7b5-ad0e-177b-3054-c779b2a8ebe0"] "I\\u001C" ["e57178f9-4d10-2fa1-7c2d-53c5a65c3463"] +[] 2947817982 "&" -69802.9769 "o" 3.3808659558052087e155 ["2142-01-24","2076-06-05 14:54:21","2068-11-30 09:36:49.672","ee4c1b5b-b8eb-7256-20b0-e7b5ad0e177b"] "" ["7c2d53c5-a65c-3463-a76e-e26583aca234"] +[-115] 481807067 ",{MM" -45534.1174 "w" 1.711178201812925e-166 ["1994-01-04","1971-12-29 08:41:23","2012-03-25 07:11:39.573","3054c779-b2a8-ebe0-ec50-64cb1c494fbd"] "\\u0000" ["055300b2-b400-653c-1ea0-2413e3a3af76"] {"1234":"5678"} diff --git a/tests/queries/0_stateless/01911_logical_error_minus.sql b/tests/queries/0_stateless/01911_logical_error_minus.sql index 3dcdedd38f5..7f371a463f8 100644 --- a/tests/queries/0_stateless/01911_logical_error_minus.sql +++ b/tests/queries/0_stateless/01911_logical_error_minus.sql @@ -26,7 +26,7 @@ INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32 INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32) SELECT number AS n, 'sin(n*n*n)*n', sin(n * n * n * n* n) AS v, v, v, v FROM system.numbers LIMIT 301, 100; -SELECT IF(-2, NULL, 0.00009999999747378752), IF(1048577, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(257, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; +SELECT IF(2, NULL, 0.00009999999747378752), IF(104, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(255, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; SELECT c1.key, c1.name, c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64, '', c2.key, c2.ref_valueF64 FROM codecTest AS c1 , codecTest AS c2 WHERE (dF64 != 3) AND c1.valueF64 != 0 AND (c2.key = (c1.key - 1048576)) LIMIT 0; @@ -72,7 +72,7 @@ INSERT INTO codecTest (key, ref_valueU64, valueU64, ref_valueU32, valueU32, ref_ SELECT number as n, n + (rand64() - 9223372036854775807)/1000 as v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, toDateTime(v), toDateTime(v), toDate(v), toDate(v) FROM system.numbers LIMIT 3001, 1000; -SELECT IF(-2, NULL, 0.00009999999747378752), IF(1048577, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(257, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; +SELECT IF(2, NULL, 0.00009999999747378752), IF(104, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(255, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; DROP TABLE codecTest; diff --git a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference new file mode 100644 index 00000000000..a0f4560ca1c --- /dev/null +++ b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference @@ -0,0 +1,162 @@ +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 10000 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 10 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [12] + ], + + "rows": 1, + + "rows_before_limit_at_least": 3 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 20 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 40 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 40 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 20 +} diff --git a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql new file mode 100644 index 00000000000..84f97090169 --- /dev/null +++ b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql @@ -0,0 +1,29 @@ +-- Tags: no-parallel, no-random-merge-tree-settings + +drop table if exists test; + +create table test (i int) engine MergeTree order by tuple(); + +insert into test select arrayJoin(range(10000)); + +set exact_rows_before_limit = 1, output_format_write_statistics = 0, max_block_size = 100; + +select * from test limit 1 FORMAT JSONCompact; + +select * from test where i < 10 group by i limit 1 FORMAT JSONCompact; + +select * from test group by i having i in (10, 11, 12) limit 1 FORMAT JSONCompact; + +select * from test where i < 20 order by i limit 1 FORMAT JSONCompact; + +set prefer_localhost_replica = 0; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 limit 1 FORMAT JSONCompact; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 order by i limit 1 FORMAT JSONCompact; + +set prefer_localhost_replica = 1; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 limit 1 FORMAT JSONCompact; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 order by i limit 1 FORMAT JSONCompact; + +select * from (select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 10) limit 1 FORMAT JSONCompact; + +drop table if exists test; diff --git a/tests/queries/0_stateless/01913_names_of_tuple_literal.sql b/tests/queries/0_stateless/01913_names_of_tuple_literal.sql index 09de9e8cf37..879f4c91587 100644 --- a/tests/queries/0_stateless/01913_names_of_tuple_literal.sql +++ b/tests/queries/0_stateless/01913_names_of_tuple_literal.sql @@ -1,2 +1,4 @@ +SET allow_experimental_analyzer = 0; + SELECT ((1, 2), (2, 3), (3, 4)) FORMAT TSVWithNames; SELECT ((1, 2), (2, 3), (3, 4)) FORMAT TSVWithNames SETTINGS legacy_column_name_of_tuple_literal = 1; diff --git a/tests/queries/0_stateless/01920_async_drain_connections.sql b/tests/queries/0_stateless/01920_async_drain_connections.sql deleted file mode 100644 index 827ca13fc1a..00000000000 --- a/tests/queries/0_stateless/01920_async_drain_connections.sql +++ /dev/null @@ -1,6 +0,0 @@ -drop table if exists t; - -create table t (number UInt64) engine = Distributed(test_cluster_two_shards, system, numbers); -select * from t where number = 0 limit 2 settings sleep_in_receive_cancel_ms = 10000, max_execution_time = 5; - -drop table t; diff --git a/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference b/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference index 93f9e3d10db..72e8abb1428 100644 --- a/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference +++ b/tests/queries/0_stateless/01925_test_group_by_const_consistency.reference @@ -1,2 +1,3 @@ 0 1 0 +1 0 diff --git a/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql b/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql index a73c06bbe49..176b5761ac5 100644 --- a/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql +++ b/tests/queries/0_stateless/01925_test_group_by_const_consistency.sql @@ -2,3 +2,5 @@ SELECT 1 as a, count() FROM numbers(10) WHERE 0 GROUP BY a; SELECT count() FROM numbers(10) WHERE 0; SELECT 1 as a, count() FROM numbers(10) WHERE 0 GROUP BY a SETTINGS empty_result_for_aggregation_by_constant_keys_on_empty_set = 0; + +SELECT 1 as a, count() FROM numbers(10) WHERE 0 GROUP BY a SETTINGS empty_result_for_aggregation_by_constant_keys_on_empty_set = 0, optimize_trivial_count_query = 0; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.reference b/tests/queries/0_stateless/01927_query_views_log_current_database.reference index eaa1e98c55c..64bf1e501f3 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.reference +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.reference @@ -1,15 +1,15 @@ Row 1: ────── stage: Query log rows -read_rows: 400 +read_rows: 300 written_rows: 201 databases: ['_table_function','default'] -tables: ['_table_function.numbers','default.table_a','default.table_b','default.table_b_live_view','default.table_c'] -views: ['default.matview_a_to_b','default.matview_b_to_c','default.table_b_live_view'] +tables: ['_table_function.numbers','default.table_a','default.table_b','default.table_c'] +views: ['default.matview_a_to_b','default.matview_b_to_c'] sleep_calls: 200 sleep_us: 298 -profile_select_rows: 400 -profile_select_bytes: 5200 +profile_select_rows: 300 +profile_select_bytes: 3600 profile_insert_rows: 201 profile_insert_bytes: 2808 Row 1: @@ -45,23 +45,6 @@ profile_select_rows: 100 profile_select_bytes: 800 profile_insert_rows: 1 profile_insert_bytes: 8 - -Row 3: -────── -stage: Depending views -view_name: default.table_b_live_view -view_type: Live -status: QueryFinish -view_target: default.table_b_live_view -view_query: SELECT sum(a + b) FROM default.table_b -read_rows: 100 -written_rows: 0 -sleep_calls: 0 -sleep_us: 0 -profile_select_rows: 100 -profile_select_bytes: 1600 -profile_insert_rows: 0 -profile_insert_bytes: 0 Row 1: ────── stage: Query log rows 2 diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index fbfbeab0167..ba42795333c 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -1,4 +1,3 @@ -SET allow_experimental_live_view = 1; SET log_queries=0; SET log_query_threads=0; @@ -16,11 +15,6 @@ CREATE MATERIALIZED VIEW matview_a_to_b TO table_b AS SELECT toFloat64(a) AS a, CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRow(0.000002)) as a FROM table_b; CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; --- SETUP LIVE VIEW ----- table_b_live_view (Int64) -DROP TABLE IF EXISTS table_b_live_view; -CREATE LIVE VIEW table_b_live_view AS SELECT sum(a + b) FROM table_b; - -- ENABLE LOGS SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; @@ -36,7 +30,6 @@ SYSTEM FLUSH LOGS; -- CHECK LOGS OF INSERT 1 --- Note that live views currently don't report written rows SELECT 'Query log rows' as stage, read_rows, @@ -133,7 +126,6 @@ ORDER BY view_name FORMAT Vertical; -- TEARDOWN -DROP TABLE table_b_live_view; DROP TABLE matview_a_to_b; DROP TABLE matview_b_to_c; DROP TABLE matview_join_d_e; diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql index 730313f1daa..95d411c4cec 100644 --- a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql @@ -1,12 +1,10 @@ -SET allow_experimental_analyzer = 1; - SELECT groupArray(2 + 3)(number) FROM numbers(10); SELECT groupArray('5'::UInt8)(number) FROM numbers(10); SELECT groupArray(NULL)(number) FROM numbers(10); -- { serverError 36 } SELECT groupArray(NULL + NULL)(number) FROM numbers(10); -- { serverError 36 } SELECT groupArray([])(number) FROM numbers(10); -- { serverError 36 } -SELECT groupArray(throwIf(1))(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(throwIf(1))(number) FROM numbers(10); -- { serverError 36, 134 } -- Not the best error message, can be improved. -SELECT groupArray(number)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(number)(number) FROM numbers(10); -- { serverError 36, 47 } diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql index 81ac6224268..46fd8adf570 100644 --- a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql @@ -1,9 +1,9 @@ set aggregate_functions_null_for_empty=0; -SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); -SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); set aggregate_functions_null_for_empty=1; -SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); -SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql index d2ca771edc5..83dd708c575 100644 --- a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql @@ -1,7 +1,9 @@ -SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 47 } +SET allow_experimental_analyzer = 1; -SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 47 } +SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 36 } -SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 47 } +SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 36 } + +SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 36 } SELECT 'still alive'; diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index 47cf6e06b48..2f8d8f06dee 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -15,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " set min_chunk_bytes_for_parallel_parsing=10485760; set max_read_buffer_size = 65536; set input_format_parallel_parsing = 0; -select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; +select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null; set input_format_parallel_parsing = 1; -select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; +select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null; " diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index 7f73a8c6554..b9a7d17e955 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -13,8 +13,7 @@ Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Merge sorted streams after aggregation stage for ORDER BY) Union - Limit (preliminary LIMIT (with OFFSET)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference index f2322e4ffc4..e39f4b962e6 100644 --- a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference +++ b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.reference @@ -1 +1,10 @@ -Connection failed at try №1, +255.255.255.255 +HedgedConnectionsFactory: Connection failed at try №1 +executeQuery: Code: 519.: All attempts to get table structure failed. +127.2,255.255.255.255 +0 +HedgedConnectionsFactory: Connection failed at try №1 +255.255.255.255,127.2 +0 +HedgedConnectionsFactory: Connection failed at try №1 +HedgedConnectionsFactory: Connection failed at try №1 diff --git a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh index 3ddb6346631..488e2fe106a 100755 --- a/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh +++ b/tests/queries/0_stateless/01956_skip_unavailable_shards_excessive_attempts.sh @@ -1,14 +1,36 @@ #!/usr/bin/env bash -# Tags: shard - -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -opts=( - "--connections_with_failover_max_tries=1" - "--skip_unavailable_shards=1" -) -$CLICKHOUSE_CLIENT --query "select * from remote('255.255.255.255', system.one)" "${opts[@]}" 2>&1 | grep -o 'Connection failed at try.*,' +stderr="$(mktemp "$CURDIR/clickhouse.stderr.XXXXXX.log")" +trap 'rm -f "$stderr"' EXIT + +function process_log_safe() +{ + grep "^\\[" "$@" | sed -e 's/.*> //' -e 's/, reason.*//' -e 's/ DB::NetException//' -e 's/ Log: //' +} +function execute_query() +{ + local hosts=$1 && shift + local opts=( + "--connections_with_failover_max_tries=1" + "--skip_unavailable_shards=1" + ) + + echo "$hosts" + # NOTE: we cannot use process substition here for simplicity because they are async, i.e.: + # + # clickhouse-client 2> >(wc -l) + # + # May dump output of "wc -l" after some other programs. + $CLICKHOUSE_CLIENT "${opts[@]}" --query "select * from remote('$hosts', system.one)" 2>"$stderr" + process_log_safe "$stderr" +} +execute_query 255.255.255.255 +execute_query 127.2,255.255.255.255 +# This will print two errors because there will be two attempts for 255.255.255.255: +# - first for obtaining structure of the table +# - second for the query +execute_query 255.255.255.255,127.2 diff --git a/tests/queries/0_stateless/02001_append_output_file.reference b/tests/queries/0_stateless/02001_append_output_file.reference new file mode 100644 index 00000000000..6f51dfc24e1 --- /dev/null +++ b/tests/queries/0_stateless/02001_append_output_file.reference @@ -0,0 +1,2 @@ +Hello, World! From client. +Hello, World! From local. diff --git a/tests/queries/0_stateless/02001_append_output_file.sh b/tests/queries/0_stateless/02001_append_output_file.sh new file mode 100755 index 00000000000..47ac0183d91 --- /dev/null +++ b/tests/queries/0_stateless/02001_append_output_file.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/test_append_to_output_file ] && rm "${CLICKHOUSE_TMP}"/test_append_to_output_file + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_append_to_output_file'" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.') INTO OUTFILE '${CLICKHOUSE_TMP}/test_append_to_output_file' APPEND" +cat ${CLICKHOUSE_TMP}/test_append_to_output_file + +rm -f "${CLICKHOUSE_TMP}/test_append_to_output_file" diff --git a/tests/queries/0_stateless/02002_global_subqueries_subquery_or_table_name.sql b/tests/queries/0_stateless/02002_global_subqueries_subquery_or_table_name.sql index 7d9c08c7cb6..8ac8dc35276 100644 --- a/tests/queries/0_stateless/02002_global_subqueries_subquery_or_table_name.sql +++ b/tests/queries/0_stateless/02002_global_subqueries_subquery_or_table_name.sql @@ -4,4 +4,4 @@ SELECT cityHash64(number GLOBAL IN (NULL, -2147483648, -9223372036854775808), nan, 1024, NULL, NULL, 1.000100016593933, NULL), (NULL, cityHash64(inf, -2147483648, NULL, NULL, 10.000100135803223), cityHash64(1.1754943508222875e-38, NULL, NULL, NULL), 2147483647) FROM cluster(test_cluster_two_shards_localhost, numbers((NULL, cityHash64(0., 65536, NULL, NULL, 10000000000., NULL), 0) GLOBAL IN (some_identifier), 65536)) -WHERE number GLOBAL IN [1025] --{serverError 284} +WHERE number GLOBAL IN [1025] --{serverError 36, 284} diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference index e2bbea2149d..40100e8d5be 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.reference +++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference @@ -86,8 +86,8 @@ FROM test GROUP BY 1 + greatest(x1, 1), x2 -select max(x1), x2 from test group by 1, 2; -- { serverError 43 } -select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43 } +select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } +select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } explain syntax select x1 + x3, x3 from test group by 1, 2; SELECT x1 + x3, diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 67f4fe24c55..159ad6bd427 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -30,8 +30,8 @@ explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order explain syntax select max(x1), x2 from test group by 2 order by 1, 2; explain syntax select 1 + greatest(x1, 1), x2 from test group by 1, 2; -select max(x1), x2 from test group by 1, 2; -- { serverError 43 } -select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43 } +select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } +select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } explain syntax select x1 + x3, x3 from test group by 1, 2; diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql index 8a8eb2afe83..a78920d2525 100644 --- a/tests/queries/0_stateless/02008_materialize_column.sql +++ b/tests/queries/0_stateless/02008_materialize_column.sql @@ -8,33 +8,33 @@ INSERT INTO tmp SELECT * FROM system.numbers LIMIT 20; ALTER TABLE tmp MATERIALIZE COLUMN x; -- { serverError 36 } ALTER TABLE tmp ADD COLUMN s String DEFAULT toString(x); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(arraySort(groupArray(x))), groupArray(s) FROM tmp; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+1); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp DROP COLUMN s; ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(x+1); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(x+2); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(x+3); -SELECT groupArray(x), groupArray(s) FROM tmp; +SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; ALTER TABLE tmp DROP COLUMN s; DROP TABLE tmp; diff --git a/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql b/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql index 59987a86590..1f6026bb61e 100644 --- a/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql +++ b/tests/queries/0_stateless/02008_tuple_to_name_value_pairs.sql @@ -19,7 +19,7 @@ INSERT INTO test02008 VALUES (tuple(3.3, 5.5, 6.6)); SELECT untuple(arrayJoin(tupleToNameValuePairs(col))) from test02008; DROP TABLE IF EXISTS test02008; -SELECT tupleToNameValuePairs(tuple(1, 1.3)); -- { serverError 43; } -SELECT tupleToNameValuePairs(tuple(1, [1,2])); -- { serverError 43; } -SELECT tupleToNameValuePairs(tuple(1, 'a')); -- { serverError 43; } -SELECT tupleToNameValuePairs(33); -- { serverError 43; } +SELECT tupleToNameValuePairs(tuple(1, 1.3)); -- { serverError 43 } +SELECT tupleToNameValuePairs(tuple(1, [1,2])); -- { serverError 43 } +SELECT tupleToNameValuePairs(tuple(1, 'a')); -- { serverError 43 } +SELECT tupleToNameValuePairs(33); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index e6d6f9e1317..a197d32a3b9 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -5,13 +5,13 @@ import socket import os import uuid -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) -CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + def writeVarUInt(x, ba): for _ in range(0, 9): - byte = x & 0x7F if x > 0x7F: byte |= 0x80 @@ -24,12 +24,12 @@ def writeVarUInt(x, ba): def writeStringBinary(s, ba): - b = bytes(s, 'utf-8') + b = bytes(s, "utf-8") writeVarUInt(len(s), ba) ba.extend(b) -def readStrict(s, size = 1): +def readStrict(s, size=1): res = bytearray() while size: cur = s.recv(size) @@ -48,18 +48,23 @@ def readUInt(s, size=1): val += res[i] << (i * 8) return val + def readUInt8(s): return readUInt(s) + def readUInt16(s): return readUInt(s, 2) + def readUInt32(s): return readUInt(s, 4) + def readUInt64(s): return readUInt(s, 8) + def readVarUInt(s): x = 0 for i in range(9): @@ -75,25 +80,25 @@ def readVarUInt(s): def readStringBinary(s): size = readVarUInt(s) s = readStrict(s, size) - return s.decode('utf-8') + return s.decode("utf-8") def sendHello(s): ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary('simple native protocol', ba) + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('default', ba) # database - writeStringBinary('default', ba) # user - writeStringBinary('', ba) # pwd + writeStringBinary("default", ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd s.sendall(ba) def receiveHello(s): p_type = readVarUInt(s) - assert (p_type == 0) # Hello + assert p_type == 0 # Hello server_name = readStringBinary(s) # print("Server name: ", server_name) server_version_major = readVarUInt(s) @@ -111,78 +116,79 @@ def receiveHello(s): def serializeClientInfo(ba, query_id): - writeStringBinary('default', ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary('127.0.0.1:9000', ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary('os_user', ba) # os_user - writeStringBinary('client_hostname', ba) # client_hostname - writeStringBinary('client_name', ba) # client_name + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('', ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry def sendQuery(s, query): ba = bytearray() query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query + writeVarUInt(1, ba) # query writeStringBinary(query_id, ba) - ba.append(1) # INITIAL_QUERY + ba.append(1) # INITIAL_QUERY # client info serializeClientInfo(ba, query_id) - writeStringBinary('', ba) # No settings - writeStringBinary('', ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally s.sendall(ba) def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num def sendEmptyBlock(s): ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns s.sendall(ba) def assertPacket(packet, expected): - assert(packet == expected), packet + assert packet == expected, packet + def readHeader(s): packet_type = readVarUInt(s) - if packet_type == 2: # Exception + if packet_type == 2: # Exception raise RuntimeError(readException(s)) - assertPacket(packet_type, 1) # Data + assertPacket(packet_type, 1) # Data - readStringBinary(s) # external table name + readStringBinary(s) # external table name # BlockInfo - assertPacket(readVarUInt(s), 1) # 1 - assertPacket(readUInt8(s), 0) # is_overflows - assertPacket(readVarUInt(s), 2) # 2 - assertPacket(readUInt32(s), 4294967295) # bucket_num - assertPacket(readVarUInt(s), 0) # 0 - columns = readVarUInt(s) # rows - rows = readVarUInt(s) # columns + assertPacket(readVarUInt(s), 1) # 1 + assertPacket(readUInt8(s), 0) # is_overflows + assertPacket(readVarUInt(s), 2) # 2 + assertPacket(readUInt32(s), 4294967295) # bucket_num + assertPacket(readVarUInt(s), 0) # 0 + columns = readVarUInt(s) # rows + rows = readVarUInt(s) # columns print("Rows {} Columns {}".format(rows, columns)) for _ in range(columns): col_name = readStringBinary(s) @@ -194,9 +200,9 @@ def readException(s): code = readUInt32(s) name = readStringBinary(s) text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def insertValidLowCardinalityRow(): @@ -205,7 +211,12 @@ def insertValidLowCardinalityRow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -213,25 +224,27 @@ def insertValidLowCardinalityRow(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (0 for 'hello') + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 2] + [0] * 6 + ) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') s.sendall(ba) # Fin block sendEmptyBlock(s) - assertPacket(readVarUInt(s), 5) # End of stream + assertPacket(readVarUInt(s), 5) # End of stream s.close() @@ -241,7 +254,12 @@ def insertLowCardinalityRowWithIndexOverflow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -249,19 +267,21 @@ def insertLowCardinalityRowWithIndexOverflow(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 7 + [1]) # UInt64 index (overflow) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 2] + [0] * 6 + ) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 7 + [1]) # UInt64 index (overflow) s.sendall(ba) assertPacket(readVarUInt(s), 2) @@ -275,7 +295,12 @@ def insertLowCardinalityRowWithIncorrectDictType(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -283,32 +308,40 @@ def insertLowCardinalityRowWithIncorrectDictType(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 3] + [0] * 6) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (overflow) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 3] + [0] * 6 + ) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (overflow) s.sendall(ba) assertPacket(readVarUInt(s), 2) print(readException(s)) s.close() + def insertLowCardinalityRowWithIncorrectAdditionalKeys(): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(30) s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -316,30 +349,34 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 0] + [0] * 6) # indexes type: UInt64 [3], with NO additional keys [0] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (0 for 'hello') + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 0] + [0] * 6 + ) # indexes type: UInt64 [3], with NO additional keys [0] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') s.sendall(ba) assertPacket(readVarUInt(s), 2) print(readException(s)) s.close() + def main(): insertValidLowCardinalityRow() insertLowCardinalityRowWithIndexOverflow() insertLowCardinalityRowWithIncorrectDictType() insertLowCardinalityRowWithIncorrectAdditionalKeys() + if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02015_async_inserts_4.sh b/tests/queries/0_stateless/02015_async_inserts_4.sh index 65598923b96..28f0e250630 100755 --- a/tests/queries/0_stateless/02015_async_inserts_4.sh +++ b/tests/queries/0_stateless/02015_async_inserts_4.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02015_async_inserts_7.sh b/tests/queries/0_stateless/02015_async_inserts_7.sh index c8cbbc48a29..29f908cdc90 100755 --- a/tests/queries/0_stateless/02015_async_inserts_7.sh +++ b/tests/queries/0_stateless/02015_async_inserts_7.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference index 534942fc1d5..35a629d2bc0 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference @@ -46,7 +46,7 @@ SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FRO ▃▄▆█ SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; ▂▅▂▃▇▆█ -WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; +WITH number DIV 50 AS k, toUInt32(number % 50) AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; 0 ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█ 1 ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█ SELECT sparkbar(128, 0, 9223372036854775806)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); @@ -54,7 +54,7 @@ SELECT sparkbar(128, 0, 9223372036854775806)(toUInt64(9223372036854775806), numb SELECT sparkbar(128)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); █ SELECT sparkbar(9)(x, y) FROM (SELECT * FROM Values('x UInt64, y UInt8', (18446744073709551615,255), (0,0), (0,0), (4036797895307271799,254))); - ▇ █ + █ █ SELECT sparkbar(8, 0, 7)((number + 1) % 8, 1), sparkbar(8, 0, 7)((number + 2) % 8, 1), sparkbar(8, 0, 7)((number + 3) % 8, 1) FROM numbers(7); ███████ █ ██████ ██ █████ SELECT sparkbar(2)(number, -number) FROM numbers(10); diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql index 2100a3dd4a6..4d2de566eda 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql @@ -33,7 +33,7 @@ SELECT sparkbar(4,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FRO SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; -WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; +WITH number DIV 50 AS k, toUInt32(number % 50) AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; SELECT sparkbar(128, 0, 9223372036854775806)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); SELECT sparkbar(128)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); @@ -59,4 +59,10 @@ SELECT sparkbar(2)(toInt32(number), number) FROM numbers(10); -- { serverError SELECT sparkbar(2, 0)(number, number) FROM numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT sparkbar(2, 0, 5, 8)(number, number) FROM numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- it causes overflow, just check that it doesn't crash under UBSan, do not check the result it's not really reasonable +SELECT sparkbar(10)(number, toInt64(number)) FROM numbers(toUInt64(9223372036854775807), 20) FORMAT Null; +SELECT sparkbar(10)(number, -number) FROM numbers(toUInt64(9223372036854775807), 7) FORMAT Null; +SELECT sparkbar(10)(number, number) FROM numbers(18446744073709551615, 7) FORMAT Null; +SELECT sparkbar(16)(number, number) FROM numbers(18446744073709551600, 16) FORMAT Null; + DROP TABLE IF EXISTS spark_bar_test; diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh index c674f21034c..3448f052f51 100755 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh @@ -43,7 +43,7 @@ EOF get_table_comment_info echo detach table - $CLICKHOUSE_CLIENT --query="DETACH TABLE comment_test_table NO DELAY;" + $CLICKHOUSE_CLIENT --query="DETACH TABLE comment_test_table SYNC;" get_table_comment_info echo re-attach table diff --git a/tests/queries/0_stateless/02030_capnp_format.reference b/tests/queries/0_stateless/02030_capnp_format.reference index 8c3c81b5bc3..2b2307bfc6a 100644 --- a/tests/queries/0_stateless/02030_capnp_format.reference +++ b/tests/queries/0_stateless/02030_capnp_format.reference @@ -50,4 +50,3 @@ OK OK OK OK -OK diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index cdc1587bccd..c15d6fe442e 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -109,8 +109,6 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint6 $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Int32') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; - $CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; diff --git a/tests/queries/0_stateless/02030_function_mapContainsKeyLike.reference b/tests/queries/0_stateless/02030_function_mapContainsKeyLike.reference index eb8bc83384e..1f8f2bf0d0c 100644 --- a/tests/queries/0_stateless/02030_function_mapContainsKeyLike.reference +++ b/tests/queries/0_stateless/02030_function_mapContainsKeyLike.reference @@ -4,3 +4,11 @@ 4 {'4-K1':'4-V1','4-K2':'4-V2'} 5 {'5-K1':'5-V1','5-K2':'5-V2'} 6 {'6-K1':'6-V1','6-K2':'6-V2'} +1 +1 +1 +1 +1 +0 +{'aa':NULL} +{} diff --git a/tests/queries/0_stateless/02030_function_mapContainsKeyLike.sql b/tests/queries/0_stateless/02030_function_mapContainsKeyLike.sql index 7d9722b4c90..b04c5945a08 100644 --- a/tests/queries/0_stateless/02030_function_mapContainsKeyLike.sql +++ b/tests/queries/0_stateless/02030_function_mapContainsKeyLike.sql @@ -10,3 +10,14 @@ SELECT id, map FROM map_containsKeyLike_test WHERE mapContainsKeyLike(map, '1-%' SELECT id, map FROM map_containsKeyLike_test WHERE mapContainsKeyLike(map, '3-%') = 0 order by id; DROP TABLE map_containsKeyLike_test; + +SELECT mapContainsKeyLike(map('aa', 1, 'bb', 2), 'a%'); +SELECT mapContainsKeyLike(map('aa', 1, 'bb', 2), materialize('a%')); +SELECT mapContainsKeyLike(materialize(map('aa', 1, 'bb', 2)), 'a%'); +SELECT mapContainsKeyLike(materialize(map('aa', 1, 'bb', 2)), materialize('a%')); + +SELECT mapContainsKeyLike(map('aa', NULL, 'bb', NULL), 'a%'); +SELECT mapContainsKeyLike(map('aa', NULL, 'bb', NULL), 'q%'); + +SELECT mapExtractKeyLike(map('aa', NULL, 'bb', NULL), 'a%'); +SELECT mapExtractKeyLike(map('aa', NULL, 'bb', NULL), 'q%'); diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference index 44c39f2a444..00e0f4ddb2e 100644 --- a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference @@ -1,15 +1,15 @@ -execute: default +execute: --allow_experimental_analyzer=1 "foo" 1 -execute: --stage fetch_columns -"dummy" +execute: --allow_experimental_analyzer=1 --stage fetch_columns +"system.one.dummy_0" 0 -execute: --stage with_mergeable_state -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state +"1_UInt8" 1 -execute: --stage with_mergeable_state_after_aggregation -"1" +execute: --allow_experimental_analyzer=1 --stage with_mergeable_state_after_aggregation +"1_UInt8" 1 -execute: --stage complete +execute: --allow_experimental_analyzer=1 --stage complete "foo" 1 diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.sh b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh index 5c1303b5160..182acc23a13 100755 --- a/tests/queries/0_stateless/02048_clickhouse_local_stage.sh +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh @@ -5,6 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function execute_query() { if [ $# -eq 0 ]; then @@ -15,8 +19,8 @@ function execute_query() ${CLICKHOUSE_LOCAL} "$@" --format CSVWithNames -q "SELECT 1 AS foo" } -execute_query # default -- complete -execute_query --stage fetch_columns -execute_query --stage with_mergeable_state -execute_query --stage with_mergeable_state_after_aggregation -execute_query --stage complete +execute_query "${opts[@]}" # default -- complete +execute_query "${opts[@]}" --stage fetch_columns +execute_query "${opts[@]}" --stage with_mergeable_state +execute_query "${opts[@]}" --stage with_mergeable_state_after_aggregation +execute_query "${opts[@]}" --stage complete diff --git a/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh b/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh index d53fe8dd305..f055ea304b2 100755 --- a/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh +++ b/tests/queries/0_stateless/02048_parallel_reading_from_infile.sh @@ -26,13 +26,12 @@ SELECT count() FROM test_infile_parallel WHERE Value='first'; SELECT count() FROM test_infile_parallel WHERE Value='second'; EOF -# Error code is 36 (BAD_ARGUMENTS). It is not ignored. -${CLICKHOUSE_CLIENT} --multiquery " -DROP TABLE IF EXISTS test_infile_parallel; +# Error code is 27 (DB::ParsingException). It is not ignored. +${CLICKHOUSE_CLIENT} -m --multiquery --query "DROP TABLE IF EXISTS test_infile_parallel; CREATE TABLE test_infile_parallel (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); SET input_format_allow_errors_num=0; INSERT INTO test_infile_parallel FROM INFILE '${CLICKHOUSE_TMP}/test_infile_parallel*' FORMAT TSV; -" 2>&1 | grep -q "36" && echo "Correct" || echo 'Fail' +" 2>&1 | grep -q "27" && echo "Correct" || echo 'Fail' ${CLICKHOUSE_LOCAL} --multiquery <&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$USER_FILES_PATH/test_02103.data @@ -14,7 +14,7 @@ FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithName for format in "${FORMATS[@]}" do $CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE - $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" + $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" done rm $DATA_FILE diff --git a/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference index 45edbc24c75..c6bd4c31538 100644 --- a/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference +++ b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.reference @@ -21,3 +21,7 @@ The results of query: SELECT id, mapExtractKeyLike(map, \'5-K1\') FROM map_extra 4 {} 5 {'5-K1':'5-V1'} 6 {} +{'aa':1} +{'aa':1} +{'aa':1} +{'aa':1} diff --git a/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql index 31f53642b74..a17b6b74595 100644 --- a/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql +++ b/tests/queries/0_stateless/02111_function_mapExtractKeyLike.sql @@ -7,7 +7,7 @@ INSERT INTO map_extractKeyLike_test VALUES (3, {'P1-K1':'3-V1','P2-K2':'3-V2'}), INSERT INTO map_extractKeyLike_test VALUES (5, {'5-K1':'5-V1','5-K2':'5-V2'}),(6, {'P3-K1':'6-V1','P4-K2':'6-V2'}); SELECT 'The data of table:'; -SELECT * FROM map_extractKeyLike_test ORDER BY id; +SELECT * FROM map_extractKeyLike_test ORDER BY id; SELECT ''; @@ -20,3 +20,8 @@ SELECT 'The results of query: SELECT id, mapExtractKeyLike(map, \'5-K1\') FROM m SELECT id, mapExtractKeyLike(map, '5-K1') FROM map_extractKeyLike_test ORDER BY id; DROP TABLE map_extractKeyLike_test; + +SELECT mapExtractKeyLike(map('aa', 1, 'bb', 2), 'a%'); +SELECT mapExtractKeyLike(map('aa', 1, 'bb', 2), materialize('a%')); +SELECT mapExtractKeyLike(materialize(map('aa', 1, 'bb', 2)), 'a%'); +SELECT mapExtractKeyLike(materialize(map('aa', 1, 'bb', 2)), materialize('a%')); diff --git a/tests/queries/0_stateless/02116_tuple_element.sql b/tests/queries/0_stateless/02116_tuple_element.sql index 4ce6e5cf136..c911712684d 100644 --- a/tests/queries/0_stateless/02116_tuple_element.sql +++ b/tests/queries/0_stateless/02116_tuple_element.sql @@ -16,7 +16,7 @@ EXPLAIN SYNTAX SELECT tupleElement(t1, 'a') FROM t_tuple_element; SELECT tupleElement(number, 1) FROM numbers(1); -- { serverError 43 } SELECT tupleElement(t1) FROM t_tuple_element; -- { serverError 42 } -SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError 47 } +SELECT tupleElement(t1, 'b') FROM t_tuple_element; -- { serverError 10, 47 } SELECT tupleElement(t1, 0) FROM t_tuple_element; -- { serverError 127 } SELECT tupleElement(t1, 3) FROM t_tuple_element; -- { serverError 127 } SELECT tupleElement(t1, materialize('a')) FROM t_tuple_element; -- { serverError 43 } @@ -28,7 +28,7 @@ SELECT tupleElement(t2, 1) FROM t_tuple_element; EXPLAIN SYNTAX SELECT tupleElement(t2, 1) FROM t_tuple_element; SELECT tupleElement(t2) FROM t_tuple_element; -- { serverError 42 } -SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError 47 } +SELECT tupleElement(t2, 'a') FROM t_tuple_element; -- { serverError 10, 47 } SELECT tupleElement(t2, 0) FROM t_tuple_element; -- { serverError 127 } SELECT tupleElement(t2, 3) FROM t_tuple_element; -- { serverError 127 } SELECT tupleElement(t2, materialize(1)) FROM t_tuple_element; -- { serverError 43 } diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index f77076bcd5c..09cc62dac00 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -47,7 +47,10 @@ CREATE TABLE system.clusters `default_database` String, `errors_count` UInt32, `slowdowns_count` UInt32, - `estimated_recovery_time` UInt32 + `estimated_recovery_time` UInt32, + `database_shard_name` String, + `database_replica_name` String, + `is_active` Nullable(UInt8) ) ENGINE = SystemClusters COMMENT 'SYSTEM TABLE is built on the fly.' @@ -281,7 +284,12 @@ CREATE TABLE system.functions `alias_to` String, `create_query` String, `origin` Enum8('System' = 0, 'SQLUserDefined' = 1, 'ExecutableUserDefined' = 2), - `description` String + `description` String, + `syntax` String, + `arguments` String, + `returned_value` String, + `examples` String, + `categories` String ) ENGINE = SystemFunctions COMMENT 'SYSTEM TABLE is built on the fly.' @@ -289,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'SHOW NAMED COLLECTIONS SECRETS' = 92, 'ACCESS MANAGEMENT' = 93, 'SYSTEM SHUTDOWN' = 94, 'SYSTEM DROP DNS CACHE' = 95, 'SYSTEM DROP MARK CACHE' = 96, 'SYSTEM DROP UNCOMPRESSED CACHE' = 97, 'SYSTEM DROP MMAP CACHE' = 98, 'SYSTEM DROP QUERY CACHE' = 99, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 100, 'SYSTEM DROP FILESYSTEM CACHE' = 101, 'SYSTEM DROP SCHEMA CACHE' = 102, 'SYSTEM DROP S3 CLIENT CACHE' = 103, 'SYSTEM DROP CACHE' = 104, 'SYSTEM RELOAD CONFIG' = 105, 'SYSTEM RELOAD USERS' = 106, 'SYSTEM RELOAD SYMBOLS' = 107, 'SYSTEM RELOAD DICTIONARY' = 108, 'SYSTEM RELOAD MODEL' = 109, 'SYSTEM RELOAD FUNCTION' = 110, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 111, 'SYSTEM RELOAD' = 112, 'SYSTEM RESTART DISK' = 113, 'SYSTEM MERGES' = 114, 'SYSTEM TTL MERGES' = 115, 'SYSTEM FETCHES' = 116, 'SYSTEM MOVES' = 117, 'SYSTEM DISTRIBUTED SENDS' = 118, 'SYSTEM REPLICATED SENDS' = 119, 'SYSTEM SENDS' = 120, 'SYSTEM REPLICATION QUEUES' = 121, 'SYSTEM DROP REPLICA' = 122, 'SYSTEM SYNC REPLICA' = 123, 'SYSTEM RESTART REPLICA' = 124, 'SYSTEM RESTORE REPLICA' = 125, 'SYSTEM WAIT LOADING PARTS' = 126, 'SYSTEM SYNC DATABASE REPLICA' = 127, 'SYSTEM SYNC TRANSACTION LOG' = 128, 'SYSTEM SYNC FILE CACHE' = 129, 'SYSTEM FLUSH DISTRIBUTED' = 130, 'SYSTEM FLUSH LOGS' = 131, 'SYSTEM FLUSH' = 132, 'SYSTEM THREAD FUZZER' = 133, 'SYSTEM UNFREEZE' = 134, 'SYSTEM' = 135, 'dictGet' = 136, 'addressToLine' = 137, 'addressToLineWithInlines' = 138, 'addressToSymbol' = 139, 'demangle' = 140, 'INTROSPECTION' = 141, 'FILE' = 142, 'URL' = 143, 'REMOTE' = 144, 'MONGO' = 145, 'MEILISEARCH' = 146, 'MYSQL' = 147, 'POSTGRES' = 148, 'SQLITE' = 149, 'ODBC' = 150, 'JDBC' = 151, 'HDFS' = 152, 'S3' = 153, 'HIVE' = 154, 'SOURCES' = 155, 'CLUSTER' = 156, 'ALL' = 157, 'NONE' = 158), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -356,6 +364,7 @@ CREATE TABLE system.merges `partition_id` String, `is_mutation` UInt8, `total_size_bytes_compressed` UInt64, + `total_size_bytes_uncompressed` UInt64, `total_size_marks` UInt64, `bytes_read_uncompressed` UInt64, `rows_read` UInt64, @@ -510,7 +519,8 @@ CREATE TABLE system.parts `last_removal_attemp_time` DateTime, `removal_state` String, `bytes` UInt64, - `marks_size` UInt64 + `marks_size` UInt64, + `part_name` String ) ENGINE = SystemParts COMMENT 'SYSTEM TABLE is built on the fly.' @@ -564,16 +574,17 @@ CREATE TABLE system.parts_columns `subcolumns.data_uncompressed_bytes` Array(UInt64), `subcolumns.marks_bytes` Array(UInt64), `bytes` UInt64, - `marks_size` UInt64 + `marks_size` UInt64, + `part_name` String ) ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'SHOW NAMED COLLECTIONS SECRETS' = 92, 'ACCESS MANAGEMENT' = 93, 'SYSTEM SHUTDOWN' = 94, 'SYSTEM DROP DNS CACHE' = 95, 'SYSTEM DROP MARK CACHE' = 96, 'SYSTEM DROP UNCOMPRESSED CACHE' = 97, 'SYSTEM DROP MMAP CACHE' = 98, 'SYSTEM DROP QUERY CACHE' = 99, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 100, 'SYSTEM DROP FILESYSTEM CACHE' = 101, 'SYSTEM DROP SCHEMA CACHE' = 102, 'SYSTEM DROP S3 CLIENT CACHE' = 103, 'SYSTEM DROP CACHE' = 104, 'SYSTEM RELOAD CONFIG' = 105, 'SYSTEM RELOAD USERS' = 106, 'SYSTEM RELOAD SYMBOLS' = 107, 'SYSTEM RELOAD DICTIONARY' = 108, 'SYSTEM RELOAD MODEL' = 109, 'SYSTEM RELOAD FUNCTION' = 110, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 111, 'SYSTEM RELOAD' = 112, 'SYSTEM RESTART DISK' = 113, 'SYSTEM MERGES' = 114, 'SYSTEM TTL MERGES' = 115, 'SYSTEM FETCHES' = 116, 'SYSTEM MOVES' = 117, 'SYSTEM DISTRIBUTED SENDS' = 118, 'SYSTEM REPLICATED SENDS' = 119, 'SYSTEM SENDS' = 120, 'SYSTEM REPLICATION QUEUES' = 121, 'SYSTEM DROP REPLICA' = 122, 'SYSTEM SYNC REPLICA' = 123, 'SYSTEM RESTART REPLICA' = 124, 'SYSTEM RESTORE REPLICA' = 125, 'SYSTEM WAIT LOADING PARTS' = 126, 'SYSTEM SYNC DATABASE REPLICA' = 127, 'SYSTEM SYNC TRANSACTION LOG' = 128, 'SYSTEM SYNC FILE CACHE' = 129, 'SYSTEM FLUSH DISTRIBUTED' = 130, 'SYSTEM FLUSH LOGS' = 131, 'SYSTEM FLUSH' = 132, 'SYSTEM THREAD FUZZER' = 133, 'SYSTEM UNFREEZE' = 134, 'SYSTEM' = 135, 'dictGet' = 136, 'addressToLine' = 137, 'addressToLineWithInlines' = 138, 'addressToSymbol' = 139, 'demangle' = 140, 'INTROSPECTION' = 141, 'FILE' = 142, 'URL' = 143, 'REMOTE' = 144, 'MONGO' = 145, 'MEILISEARCH' = 146, 'MYSQL' = 147, 'POSTGRES' = 148, 'SQLITE' = 149, 'ODBC' = 150, 'JDBC' = 151, 'HDFS' = 152, 'S3' = 153, 'HIVE' = 154, 'SOURCES' = 155, 'CLUSTER' = 156, 'ALL' = 157, 'NONE' = 158), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), `aliases` Array(String), - `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'SHOW NAMED COLLECTIONS SECRETS' = 92, 'ACCESS MANAGEMENT' = 93, 'SYSTEM SHUTDOWN' = 94, 'SYSTEM DROP DNS CACHE' = 95, 'SYSTEM DROP MARK CACHE' = 96, 'SYSTEM DROP UNCOMPRESSED CACHE' = 97, 'SYSTEM DROP MMAP CACHE' = 98, 'SYSTEM DROP QUERY CACHE' = 99, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 100, 'SYSTEM DROP FILESYSTEM CACHE' = 101, 'SYSTEM DROP SCHEMA CACHE' = 102, 'SYSTEM DROP S3 CLIENT CACHE' = 103, 'SYSTEM DROP CACHE' = 104, 'SYSTEM RELOAD CONFIG' = 105, 'SYSTEM RELOAD USERS' = 106, 'SYSTEM RELOAD SYMBOLS' = 107, 'SYSTEM RELOAD DICTIONARY' = 108, 'SYSTEM RELOAD MODEL' = 109, 'SYSTEM RELOAD FUNCTION' = 110, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 111, 'SYSTEM RELOAD' = 112, 'SYSTEM RESTART DISK' = 113, 'SYSTEM MERGES' = 114, 'SYSTEM TTL MERGES' = 115, 'SYSTEM FETCHES' = 116, 'SYSTEM MOVES' = 117, 'SYSTEM DISTRIBUTED SENDS' = 118, 'SYSTEM REPLICATED SENDS' = 119, 'SYSTEM SENDS' = 120, 'SYSTEM REPLICATION QUEUES' = 121, 'SYSTEM DROP REPLICA' = 122, 'SYSTEM SYNC REPLICA' = 123, 'SYSTEM RESTART REPLICA' = 124, 'SYSTEM RESTORE REPLICA' = 125, 'SYSTEM WAIT LOADING PARTS' = 126, 'SYSTEM SYNC DATABASE REPLICA' = 127, 'SYSTEM SYNC TRANSACTION LOG' = 128, 'SYSTEM SYNC FILE CACHE' = 129, 'SYSTEM FLUSH DISTRIBUTED' = 130, 'SYSTEM FLUSH LOGS' = 131, 'SYSTEM FLUSH' = 132, 'SYSTEM THREAD FUZZER' = 133, 'SYSTEM UNFREEZE' = 134, 'SYSTEM' = 135, 'dictGet' = 136, 'addressToLine' = 137, 'addressToLineWithInlines' = 138, 'addressToSymbol' = 139, 'demangle' = 140, 'INTROSPECTION' = 141, 'FILE' = 142, 'URL' = 143, 'REMOTE' = 144, 'MONGO' = 145, 'MEILISEARCH' = 146, 'MYSQL' = 147, 'POSTGRES' = 148, 'SQLITE' = 149, 'ODBC' = 150, 'JDBC' = 151, 'HDFS' = 152, 'S3' = 153, 'HIVE' = 154, 'SOURCES' = 155, 'CLUSTER' = 156, 'ALL' = 157, 'NONE' = 158)) + `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' @@ -685,7 +696,8 @@ CREATE TABLE system.projection_parts `rows_where_ttl_info.min` Array(DateTime), `rows_where_ttl_info.max` Array(DateTime), `bytes` UInt64, - `marks_size` UInt64 + `marks_size` UInt64, + `part_name` String ) ENGINE = SystemProjectionParts COMMENT 'SYSTEM TABLE is built on the fly.' @@ -739,7 +751,8 @@ CREATE TABLE system.projection_parts_columns `column_data_uncompressed_bytes` UInt64, `column_marks_bytes` UInt64, `bytes` UInt64, - `marks_size` UInt64 + `marks_size` UInt64, + `part_name` String ) ENGINE = SystemProjectionPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' @@ -846,6 +859,7 @@ CREATE TABLE system.replicas `is_session_expired` UInt8, `future_parts` UInt32, `parts_to_check` UInt32, + `zookeeper_name` String, `zookeeper_path` String, `replica_name` String, `replica_path` String, @@ -867,6 +881,7 @@ CREATE TABLE system.replicas `absolute_delay` UInt64, `total_replicas` UInt8, `active_replicas` UInt8, + `lost_part_count` UInt64, `last_queue_update_exception` String, `zookeeper_exception` String, `replica_is_active` Map(String, UInt8) @@ -1024,10 +1039,12 @@ CREATE TABLE system.storage_policies `volume_name` String, `volume_priority` UInt64, `disks` Array(String), - `volume_type` String, + `volume_type` Enum8('JBOD' = 0, 'SINGLE_DISK' = 1, 'UNKNOWN' = 2), `max_data_part_size` UInt64, `move_factor` Float32, - `prefer_not_to_merge` UInt8 + `prefer_not_to_merge` UInt8, + `perform_ttl_move_on_insert` UInt8, + `load_balancing` Enum8('ROUND_ROBIN' = 0, 'LEAST_USED' = 1) ) ENGINE = SystemStoragePolicies COMMENT 'SYSTEM TABLE is built on the fly.' @@ -1110,7 +1127,7 @@ CREATE TABLE system.users `name` String, `id` UUID, `storage` String, - `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6), + `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7), `auth_params` String, `host_ip` Array(String), `host_names` Array(String), diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql index 4aad7ae3694..822ffb19764 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -6,9 +6,9 @@ insert into test values (0); select if(0, y, 42) from test; select if(1, 42, y) from test; select if(toUInt8(0), y, 42) from test; -select if(toInt8(0), y, 42) from test; +select if(toUInt8(0), y, 42) from test; +select if(toUInt8(1), 42, y) from test; select if(toUInt8(1), 42, y) from test; -select if(toInt8(1), 42, y) from test; select if(toUInt8(toUInt8(0)), y, 42) from test; select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; explain syntax select x, if((select hasColumnInTable(currentDatabase(), 'test', 'y')), y, x || '_') from test; diff --git a/tests/queries/0_stateless/02125_many_mutations_2.reference b/tests/queries/0_stateless/02125_many_mutations_2.reference new file mode 100644 index 00000000000..4bdea51dfc1 --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations_2.reference @@ -0,0 +1,4 @@ +2000 +20000 +0 +1000 diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh new file mode 100755 index 00000000000..df170a402c6 --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x" +$CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" +$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" + +$CLICKHOUSE_CLIENT -q "select count() from many_mutations" + +job() +{ + for i in {1..1000} + do + $CLICKHOUSE_CLIENT -q "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync=0" + done +} + +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & + +wait + +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "system start merges many_mutations" +$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" +$CLICKHOUSE_CLIENT -q "system flush logs" +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "select count() from many_mutations" +$CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" diff --git a/tests/queries/0_stateless/02125_query_views_log_window_function.sql b/tests/queries/0_stateless/02125_query_views_log_window_function.sql index 1de2cc95b14..fff1e943c58 100644 --- a/tests/queries/0_stateless/02125_query_views_log_window_function.sql +++ b/tests/queries/0_stateless/02125_query_views_log_window_function.sql @@ -1,4 +1,6 @@ +set allow_experimental_analyzer = 0; set allow_experimental_window_view = 1; + CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory; CREATE WINDOW VIEW wv Engine Memory as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id; diff --git a/tests/queries/0_stateless/02125_transform_decimal_bug.reference b/tests/queries/0_stateless/02125_transform_decimal_bug.reference index 7f59d0ee7bf..d1bf333ec8e 100644 --- a/tests/queries/0_stateless/02125_transform_decimal_bug.reference +++ b/tests/queries/0_stateless/02125_transform_decimal_bug.reference @@ -1,3 +1,4 @@ +1 0 1 2 diff --git a/tests/queries/0_stateless/02125_transform_decimal_bug.sql b/tests/queries/0_stateless/02125_transform_decimal_bug.sql index 4ef471ea875..002f60076e9 100644 --- a/tests/queries/0_stateless/02125_transform_decimal_bug.sql +++ b/tests/queries/0_stateless/02125_transform_decimal_bug.sql @@ -1,4 +1,4 @@ -SELECT transform(1, [1], [toDecimal32(1, 2)]); -- { serverError 44 } +SELECT transform(1, [1], [toDecimal32(1, 2)]); SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)]) FROM system.numbers LIMIT 10; SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10; SELECT transform(number, [3, 5, 11], [toDecimal32(30, 2), toDecimal32(50, 2), toDecimal32(70,2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/02126_url_auth.python b/tests/queries/0_stateless/02126_url_auth.python index 57b16fb413e..9b2e68a017d 100644 --- a/tests/queries/0_stateless/02126_url_auth.python +++ b/tests/queries/0_stateless/02126_url_auth.python @@ -12,6 +12,7 @@ import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer + def is_ipv6(host): try: socket.inet_aton(host) @@ -19,6 +20,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -29,8 +31,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) + +CSV_DATA = os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) +) -CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -68,15 +86,16 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + class CSVHTTPServer(BaseHTTPRequestHandler): def _set_headers(self): self.send_response(200) - self.send_header('Content-type', 'text/csv') + self.send_header("Content-type", "text/csv") self.end_headers() def do_GET(self): self._set_headers() - self.wfile.write(('hello, world').encode()) + self.wfile.write(("hello, world").encode()) # with open(CSV_DATA, 'r') as fl: # reader = csv.reader(fl, delimiter=',') # for row in reader: @@ -84,33 +103,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler): return def read_chunk(self): - msg = '' + msg = "" while True: sym = self.rfile.read(1) - if sym == '': + if sym == "": break - msg += sym.decode('utf-8') - if msg.endswith('\r\n'): + msg += sym.decode("utf-8") + if msg.endswith("\r\n"): break length = int(msg[:-2], 16) if length == 0: - return '' + return "" content = self.rfile.read(length) - self.rfile.read(2) # read sep \r\n - return content.decode('utf-8') + self.rfile.read(2) # read sep \r\n + return content.decode("utf-8") def do_POST(self): - data = '' + data = "" while True: chunk = self.read_chunk() if not chunk: break data += chunk with StringIO(data) as fl: - reader = csv.reader(fl, delimiter=',') - with open(CSV_DATA, 'a') as d: + reader = csv.reader(fl, delimiter=",") + with open(CSV_DATA, "a") as d: for row in reader: - d.write(','.join(row) + '\n') + d.write(",".join(row) + "\n") self._set_headers() self.wfile.write(b"ok") @@ -121,6 +140,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) @@ -130,57 +150,87 @@ def start_server(): t = threading.Thread(target=httpd.serve_forever) return t, httpd + # test section -def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): - with open(CSV_DATA, 'w') as f: # clear file - f.write('') + +def test_select( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests=[], + answers=[], + test_data="", +): + with open(CSV_DATA, "w") as f: # clear file + f.write("") if test_data: - with open(CSV_DATA, 'w') as f: + with open(CSV_DATA, "w") as f: f.write(test_data + "\n") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for i in range(len(requests)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) -def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): - with open(CSV_DATA, 'w') as f: # flush test file - f.write('') + +def test_insert( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests_insert=[], + requests_select=[], + answers=[], +): + with open(CSV_DATA, "w") as f: # flush test file + f.write("") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for req in requests_insert: tbl = table_name if not tbl: - tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "table function url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) get_ch_answer(req.format(tbl=tbl)) - for i in range(len(requests_select)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests_select[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) + def test_select_url_engine(requests=[], answers=[], test_data=""): for i in range(len(requests)): check_answers(requests[i], answers[i]) + def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" """ @@ -203,19 +253,29 @@ def main(): """ if IS_IPV6: - query = "select * from url('http://guest:guest@" + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" + query = ( + "select * from url('http://guest:guest@" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/', 'RawBLOB', 'a String')" + ) else: - query = "select * from url('http://guest:guest@" + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" - - + query = ( + "select * from url('http://guest:guest@" + + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + + "/', 'RawBLOB', 'a String')" + ) select_requests_url_auth = { - query : 'hello, world', + query: "hello, world", } t, httpd = start_server() t.start() - test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data) + test_select( + requests=list(select_requests_url_auth.keys()), + answers=list(select_requests_url_auth.values()), + test_data=test_data, + ) httpd.shutdown() t.join() print("PASSED") diff --git a/tests/queries/0_stateless/02127_connection_drain.reference b/tests/queries/0_stateless/02127_connection_drain.reference deleted file mode 100644 index c31f2f40f6d..00000000000 --- a/tests/queries/0_stateless/02127_connection_drain.reference +++ /dev/null @@ -1,2 +0,0 @@ -OK: sync drain -OK: async drain diff --git a/tests/queries/0_stateless/02127_connection_drain.sh b/tests/queries/0_stateless/02127_connection_drain.sh deleted file mode 100755 index 523b02d9bd5..00000000000 --- a/tests/queries/0_stateless/02127_connection_drain.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-parallel - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -# sync drain -for _ in {1..100}; do - prev=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") - curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=-1 format Null" - now=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") - if [[ "$prev" != $(( now-2 )) ]]; then - continue - fi - echo "OK: sync drain" - break -done - -# async drain -for _ in {1..100}; do - prev=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'AsyncDrainedConnections'") - curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=10 format Null" - now=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'AsyncDrainedConnections'") - if [[ "$prev" != $(( now-2 )) ]]; then - continue - fi - echo "OK: async drain" - break -done diff --git a/tests/queries/0_stateless/02129_add_column_add_ttl.reference b/tests/queries/0_stateless/02129_add_column_add_ttl.reference index 8b3280ef095..5bffe7b5903 100644 --- a/tests/queries/0_stateless/02129_add_column_add_ttl.reference +++ b/tests/queries/0_stateless/02129_add_column_add_ttl.reference @@ -1,41 +1,42 @@ 0 2021-01-01 0 -0 2021-01-01 0 1 2021-01-01 0 -1 2021-01-01 0 -2 2021-01-01 0 2 2021-01-01 0 3 2021-01-01 0 -3 2021-01-01 0 -4 2021-01-01 0 4 2021-01-01 0 5 2021-01-01 0 -5 2021-01-01 0 -6 2021-01-01 0 6 2021-01-01 0 7 2021-01-01 0 -7 2021-01-01 0 8 2021-01-01 0 -8 2021-01-01 0 -9 2021-01-01 0 9 2021-01-01 0 ========== 0 2021-01-01 0 +0 2021-01-01 1 +1 2021-01-01 0 +1 2021-01-01 1 +2 2021-01-01 0 +2 2021-01-01 1 +3 2021-01-01 0 +3 2021-01-01 1 +4 2021-01-01 0 +4 2021-01-01 1 +5 2021-01-01 0 +5 2021-01-01 1 +6 2021-01-01 0 +6 2021-01-01 1 +7 2021-01-01 0 +7 2021-01-01 1 +8 2021-01-01 0 +8 2021-01-01 1 +9 2021-01-01 0 +9 2021-01-01 1 +========== 0 2021-01-01 0 1 2021-01-01 0 -1 2021-01-01 0 -2 2021-01-01 0 2 2021-01-01 0 3 2021-01-01 0 -3 2021-01-01 0 -4 2021-01-01 0 4 2021-01-01 0 5 2021-01-01 0 -5 2021-01-01 0 -6 2021-01-01 0 6 2021-01-01 0 7 2021-01-01 0 -7 2021-01-01 0 -8 2021-01-01 0 8 2021-01-01 0 9 2021-01-01 0 -9 2021-01-01 0 diff --git a/tests/queries/0_stateless/02129_add_column_add_ttl.sql b/tests/queries/0_stateless/02129_add_column_add_ttl.sql index 7a6dd928a3f..a68868ed3bc 100644 --- a/tests/queries/0_stateless/02129_add_column_add_ttl.sql +++ b/tests/queries/0_stateless/02129_add_column_add_ttl.sql @@ -4,10 +4,12 @@ create table ttl_test_02129(a Int64, b String, d Date) Engine=MergeTree partition by d order by a settings min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, materialize_ttl_recalculate_only = 0; +system stop ttl merges ttl_test_02129; + insert into ttl_test_02129 select number, '', '2021-01-01' from numbers(10); alter table ttl_test_02129 add column c Int64 settings mutations_sync=2; -insert into ttl_test_02129 select number, '', '2021-01-01', 0 from numbers(10); +insert into ttl_test_02129 select number, '', '2021-01-01', 1 from numbers(10); alter table ttl_test_02129 modify TTL (d + INTERVAL 1 MONTH) DELETE WHERE c=1 settings mutations_sync=2; select * from ttl_test_02129 order by a, b, d, c; @@ -21,11 +23,21 @@ create table ttl_test_02129(a Int64, b String, d Date) Engine=MergeTree partition by d order by a settings min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, materialize_ttl_recalculate_only = 1; +system stop ttl merges ttl_test_02129; + insert into ttl_test_02129 select number, '', '2021-01-01' from numbers(10); alter table ttl_test_02129 add column c Int64 settings mutations_sync=2; -insert into ttl_test_02129 select number, '', '2021-01-01', 0 from numbers(10); +insert into ttl_test_02129 select number, '', '2021-01-01', 1 from numbers(10); alter table ttl_test_02129 modify TTL (d + INTERVAL 1 MONTH) DELETE WHERE c=1 settings mutations_sync=2; +select * from ttl_test_02129 order by a, b, d, c; + +select '=========='; + +system start ttl merges ttl_test_02129; + +optimize table ttl_test_02129 final; + select * from ttl_test_02129 order by a, b, d, c; drop table ttl_test_02129; diff --git a/tests/queries/0_stateless/02136_scalar_read_rows_json.reference b/tests/queries/0_stateless/02136_scalar_read_rows_json.reference index 49020a4432f..f8335125233 100644 --- a/tests/queries/0_stateless/02136_scalar_read_rows_json.reference +++ b/tests/queries/0_stateless/02136_scalar_read_rows_json.reference @@ -30,7 +30,7 @@ "meta": [ { - "type": "Tuple(UInt64, UInt64)" + "type": "Tuple(`max(number)` UInt64, `count(number)` UInt64)" } ], diff --git a/tests/queries/0_stateless/02136_scalar_read_rows_json.sh b/tests/queries/0_stateless/02136_scalar_read_rows_json.sh index 34b4b6909b5..1fe345d266d 100755 --- a/tests/queries/0_stateless/02136_scalar_read_rows_json.sh +++ b/tests/queries/0_stateless/02136_scalar_read_rows_json.sh @@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "#1" ${CLICKHOUSE_CLIENT} --query='SELECT count() FROM numbers(100) FORMAT JSON;' | grep -a -v "elapsed" echo "#2" -${CLICKHOUSE_CLIENT} --query='SELECT (SELECT max(number), count(number) FROM numbers(100000) as n) SETTINGS max_block_size = 65505 FORMAT JSON;' | grep -a -v "elapsed" | grep -v "_subquery" +${CLICKHOUSE_CLIENT} --query='SELECT (SELECT max(number), count(number) FROM numbers(100000) as n) SETTINGS max_block_size = 65505, allow_experimental_analyzer = 1 FORMAT JSON;' | grep -a -v "elapsed" | grep -v "_subquery" diff --git a/tests/queries/0_stateless/02148_in_memory_part_flush.reference b/tests/queries/0_stateless/02148_in_memory_part_flush.reference deleted file mode 100644 index 219c5f4b22f..00000000000 --- a/tests/queries/0_stateless/02148_in_memory_part_flush.reference +++ /dev/null @@ -1,4 +0,0 @@ -before DETACH TABLE -500 -after DETACH TABLE -500 diff --git a/tests/queries/0_stateless/02148_in_memory_part_flush.sql b/tests/queries/0_stateless/02148_in_memory_part_flush.sql deleted file mode 100644 index ec20721186e..00000000000 --- a/tests/queries/0_stateless/02148_in_memory_part_flush.sql +++ /dev/null @@ -1,26 +0,0 @@ -DROP TABLE IF EXISTS mem_part_flush; - -CREATE TABLE mem_part_flush -( -`key` UInt32, -`ts` DateTime, -`db_time` DateTime DEFAULT now() -) -ENGINE = MergeTree -ORDER BY (key, ts) -SETTINGS min_rows_for_compact_part = 1000000, min_bytes_for_compact_part = 200000000, in_memory_parts_enable_wal = 0; - -INSERT INTO mem_part_flush(key, ts) SELECT number % 1000, now() + intDiv(number,1000) FROM numbers(500); - -SELECT 'before DETACH TABLE'; -SELECT count(*) FROM mem_part_flush; - -DETACH TABLE mem_part_flush; - -ATTACH TABLE mem_part_flush; - -SELECT 'after DETACH TABLE'; -SELECT count(*) FROM mem_part_flush; - - -DROP TABLE mem_part_flush; diff --git a/tests/queries/0_stateless/02149_external_schema_inference.sh b/tests/queries/0_stateless/02149_external_schema_inference.sh index df2b9a43565..5e03120c80f 100755 --- a/tests/queries/0_stateless/02149_external_schema_inference.sh +++ b/tests/queries/0_stateless/02149_external_schema_inference.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_02149.data +FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME touch $DATA_FILE diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference index 71ca326f952..d1662cdeb81 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference @@ -66,6 +66,18 @@ ExpressionTransform ExpressionTransform (ReadFromMergeTree) MergeTreeInOrder 0 → 1 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + ExpressionTransform + MergeTreeInOrder 0 → 1 2020-10-11 0 0 2020-10-11 0 10 2020-10-11 0 20 @@ -82,6 +94,20 @@ ExpressionTransform ExpressionTransform (ReadFromMergeTree) MergeTreeInOrder 0 → 1 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + FinishSortingTransform + PartialSortingTransform + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + ExpressionTransform + MergeTreeInOrder 0 → 1 2020-10-12 0 2020-10-12 1 2020-10-12 2 @@ -104,6 +130,19 @@ ExpressionTransform (ReadFromMergeTree) ReverseTransform MergeTreeReverse 0 → 1 +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + ExpressionTransform + ReverseTransform + MergeTreeReverse 0 → 1 2020-10-12 99999 2020-10-12 99998 2020-10-12 99997 diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql index f50aab67d77..5e662bd7842 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql @@ -21,17 +21,20 @@ SELECT toStartOfMonth(date) as d, i FROM t_read_in_order ORDER BY d, -i LIMIT 5; EXPLAIN PIPELINE SELECT toStartOfMonth(date) as d, i FROM t_read_in_order ORDER BY d, -i LIMIT 5; SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5; -EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5; +EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5 settings allow_experimental_analyzer=0; +EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5 settings allow_experimental_analyzer=1; SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5; -EXPLAIN PIPELINE SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5; +EXPLAIN PIPELINE SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5 settings allow_experimental_analyzer=0; +EXPLAIN PIPELINE SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5 settings allow_experimental_analyzer=1; INSERT INTO t_read_in_order SELECT '2020-10-12', number, number FROM numbers(100000); SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i LIMIT 5; EXPLAIN SYNTAX SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5; -EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5; +EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5 settings allow_experimental_analyzer=0; +EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5 settings allow_experimental_analyzer=1; SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5; DROP TABLE IF EXISTS t_read_in_order; diff --git a/tests/queries/0_stateless/02149_schema_inference.sh b/tests/queries/0_stateless/02149_schema_inference.sh index 1ccec240627..79b26f5b3f2 100755 --- a/tests/queries/0_stateless/02149_schema_inference.sh +++ b/tests/queries/0_stateless/02149_schema_inference.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_02149.data +FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME touch $DATA_FILE diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference index 47391a77ee8..9a7a1611a7b 100644 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference @@ -422,9 +422,9 @@ float32 Float32 float64 Float64 0 0 1.2 0.7692307692307692 -date Int32 -0 -1 +date Date32 +1970-01-01 +1970-01-02 str String fixed_string FixedString(3) Str: 0 100 diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh index d263ef63681..cf5a086fb5e 100755 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_02149.data +FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME for format in Arrow ArrowStream Parquet ORC Native TSVWithNamesAndTypes TSVRawWithNamesAndTypes CSVWithNamesAndTypes JSONCompactEachRowWithNamesAndTypes JSONCompactStringsEachRowWithNamesAndTypes RowBinaryWithNamesAndTypes CustomSeparatedWithNamesAndTypes diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh index ccd6f89e752..4a1eea0a238 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-tsan # shellcheck disable=SC2154 diff --git a/tests/queries/0_stateless/02153_native_bounds_check.sh b/tests/queries/0_stateless/02153_native_bounds_check.sh deleted file mode 100755 index a3475ddacae..00000000000 --- a/tests/queries/0_stateless/02153_native_bounds_check.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Should correctly handle error. - -${CLICKHOUSE_LOCAL} --query "SELECT toString(number) AS a, toString(number) AS a FROM numbers(10)" --output-format Native | - ${CLICKHOUSE_LOCAL} --query "SELECT * FROM table" --input-format Native --structure 'a LowCardinality(String)' 2>&1 | - grep -c -F Exception diff --git a/tests/queries/0_stateless/02154_dictionary_get_http_json.reference b/tests/queries/0_stateless/02154_dictionary_get_http_json.reference index 7106f551cd7..201d3b122e8 100644 --- a/tests/queries/0_stateless/02154_dictionary_get_http_json.reference +++ b/tests/queries/0_stateless/02154_dictionary_get_http_json.reference @@ -3,11 +3,11 @@ "meta": [ { - "name": "dictGet(02154_test_dictionary, 'value', toUInt64(0))", + "name": "dictGet('02154_test_dictionary', 'value', toUInt64(0))", "type": "String" }, { - "name": "dictGet(02154_test_dictionary, 'value', toUInt64(1))", + "name": "dictGet('02154_test_dictionary', 'value', toUInt64(1))", "type": "String" } ], @@ -15,8 +15,8 @@ "data": [ { - "dictGet(02154_test_dictionary, 'value', toUInt64(0))": "Value", - "dictGet(02154_test_dictionary, 'value', toUInt64(1))": "" + "dictGet('02154_test_dictionary', 'value', toUInt64(0))": "Value", + "dictGet('02154_test_dictionary', 'value', toUInt64(1))": "" } ], diff --git a/tests/queries/0_stateless/02154_dictionary_get_http_json.sh b/tests/queries/0_stateless/02154_dictionary_get_http_json.sh index a2bce866c76..fbaf67fff2f 100755 --- a/tests/queries/0_stateless/02154_dictionary_get_http_json.sh +++ b/tests/queries/0_stateless/02154_dictionary_get_http_json.sh @@ -32,6 +32,7 @@ $CLICKHOUSE_CLIENT -q """ echo """ SELECT dictGet(02154_test_dictionary, 'value', toUInt64(0)), dictGet(02154_test_dictionary, 'value', toUInt64(1)) + SETTINGS allow_experimental_analyzer = 1 FORMAT JSON """ | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&wait_end_of_query=1&output_format_write_statistics=0" -d @- diff --git a/tests/queries/0_stateless/02158_proportions_ztest_cmp.python b/tests/queries/0_stateless/02158_proportions_ztest_cmp.python index d622004db28..0555f8c36ec 100644 --- a/tests/queries/0_stateless/02158_proportions_ztest_cmp.python +++ b/tests/queries/0_stateless/02158_proportions_ztest_cmp.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from math import sqrt, nan @@ -8,7 +8,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -25,7 +25,7 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha): return nan, nan, nan, nan z_stat = (p1 - p2) / se - one_side = 1 - stats.norm.cdf(abs(z_stat)) + one_side = 1 - stats.norm.cdf(abs(z_stat)) p_value = one_side * 2 z = stats.norm.ppf(1 - 0.5 * alpha) @@ -38,71 +38,171 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha): def test_and_check(name, z_stat, p_value, ci_lower, ci_upper, precision=1e-2): client = ClickHouseClient() real = client.query_return_df( - "SELECT roundBankers({}.1, 16) as z_stat, ".format(name) + - "roundBankers({}.2, 16) as p_value, ".format(name) + - "roundBankers({}.3, 16) as ci_lower, ".format(name) + - "roundBankers({}.4, 16) as ci_upper ".format(name) + - "FORMAT TabSeparatedWithNames;") - real_z_stat = real['z_stat'][0] - real_p_value = real['p_value'][0] - real_ci_lower = real['ci_lower'][0] - real_ci_upper = real['ci_upper'][0] - assert((np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(real_z_stat - np.float64(z_stat)) < precision), "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat) - assert((np.isnan(real_p_value) and np.isnan(p_value)) or abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) - assert((np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(real_ci_lower - np.float64(ci_lower)) < precision), "clickhouse_ci_lower {}, py_ci_lower {}".format(real_ci_lower, ci_lower) - assert((np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(real_ci_upper - np.float64(ci_upper)) < precision), "clickhouse_ci_upper {}, py_ci_upper {}".format(real_ci_upper, ci_upper) + "SELECT roundBankers({}.1, 16) as z_stat, ".format(name) + + "roundBankers({}.2, 16) as p_value, ".format(name) + + "roundBankers({}.3, 16) as ci_lower, ".format(name) + + "roundBankers({}.4, 16) as ci_upper ".format(name) + + "FORMAT TabSeparatedWithNames;" + ) + real_z_stat = real["z_stat"][0] + real_p_value = real["p_value"][0] + real_ci_lower = real["ci_lower"][0] + real_ci_upper = real["ci_upper"][0] + assert (np.isnan(real_z_stat) and np.isnan(z_stat)) or abs( + real_z_stat - np.float64(z_stat) + ) < precision, "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat) + assert (np.isnan(real_p_value) and np.isnan(p_value)) or abs( + real_p_value - np.float64(p_value) + ) < precision, "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) + assert (np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs( + real_ci_lower - np.float64(ci_lower) + ) < precision, "clickhouse_ci_lower {}, py_ci_lower {}".format( + real_ci_lower, ci_lower + ) + assert (np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs( + real_ci_upper - np.float64(ci_upper) + ) < precision, "clickhouse_ci_upper {}, py_ci_upper {}".format( + real_ci_upper, ci_upper + ) def test_mean_ztest(): counts = [0, 0] nobs = [0, 0] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + 10, 10, 10, 10, 0.05 + ) counts = [10, 10] nobs = [10, 10] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + 10, 10, 10, 10, 0.05 + ) counts = [16, 16] nobs = [16, 18] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) counts = [10, 20] nobs = [30, 40] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) counts = [20, 10] nobs = [40, 30] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(10,20), randrange(10,20)] - nobs = [randrange(counts[0] + 1, counts[0] * 2), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + counts = [randrange(10, 20), randrange(10, 20)] + nobs = [ + randrange(counts[0] + 1, counts[0] * 2), + randrange(counts[1], counts[1] * 2), + ] + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,100), randrange(1,200)] + counts = [randrange(1, 100), randrange(1, 200)] nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 3)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,200), randrange(1,100)] + counts = [randrange(1, 200), randrange(1, 100)] nobs = [randrange(counts[0], counts[0] * 3), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,1000), randrange(1,1000)] + counts = [randrange(1, 1000), randrange(1, 1000)] nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) if __name__ == "__main__": test_mean_ztest() print("Ok.") - diff --git a/tests/queries/0_stateless/02158_ztest_cmp.python b/tests/queries/0_stateless/02158_ztest_cmp.python index 8fc22d78e74..9591a150337 100644 --- a/tests/queries/0_stateless/02158_ztest_cmp.python +++ b/tests/queries/0_stateless/02158_ztest_cmp.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from statistics import variance @@ -7,7 +7,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -30,46 +30,95 @@ def twosample_mean_ztest(rvs1, rvs2, alpha=0.05): def test_and_check(name, a, b, t_stat, p_value, ci_low, ci_high, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ztest;") - client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) - client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) + client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ztest VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ztest VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value, ".format(name) + - "roundBankers({}(left, right).3, 16) as ci_low, ".format(name) + - "roundBankers({}(left, right).4, 16) as ci_high ".format(name) + - "FROM ztest FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - real_ci_low = real['ci_low'][0] - real_ci_high = real['ci_high'][0] - assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) - assert(abs(real_ci_low - np.float64(ci_low)) < precision), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low) - assert(abs(real_ci_high - np.float64(ci_high)) < precision), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value, ".format(name) + + "roundBankers({}(left, right).3, 16) as ci_low, ".format(name) + + "roundBankers({}(left, right).4, 16) as ci_high ".format(name) + + "FROM ztest FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + real_ci_low = real["ci_low"][0] + real_ci_high = real["ci_high"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) + assert ( + abs(real_ci_low - np.float64(ci_low)) < precision + ), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low) + assert ( + abs(real_ci_high - np.float64(ci_high)) < precision + ), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high) client.query("DROP TABLE IF EXISTS ztest;") def test_mean_ztest(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) if __name__ == "__main__": diff --git a/tests/queries/0_stateless/02160_untuple_exponential_growth.sh b/tests/queries/0_stateless/02160_untuple_exponential_growth.sh index 9ec6594af69..2bc8f74a524 100755 --- a/tests/queries/0_stateless/02160_untuple_exponential_growth.sh +++ b/tests/queries/0_stateless/02160_untuple_exponential_growth.sh @@ -7,5 +7,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Should finish in reasonable time (milliseconds). # In previous versions this query led to exponential complexity of query analysis. -${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple((1, untuple((untuple(tuple(untuple(tuple(untuple((untuple((1, 1, 1, 1)), 1, 1, 1)))))), 1, 1))))))" 2>&1 | grep -cF 'TOO_BIG_AST' -${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple((1, 1, 1, 1, 1))))))))))))))))))))))))))" 2>&1 | grep -cF 'TOO_BIG_AST' +${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple((1, untuple((untuple(tuple(untuple(tuple(untuple((untuple((1, 1, 1, 1)), 1, 1, 1)))))), 1, 1))))))" 2>&1 | grep -cF 'too big' +${CLICKHOUSE_LOCAL} --query "SELECT untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple(tuple(untuple((1, 1, 1, 1, 1))))))))))))))))))))))))))" 2>&1 | grep -cF 'too big' diff --git a/tests/queries/0_stateless/02163_operators.sql b/tests/queries/0_stateless/02163_operators.sql index 4968e448ab2..3f2d7d8bbb7 100644 --- a/tests/queries/0_stateless/02163_operators.sql +++ b/tests/queries/0_stateless/02163_operators.sql @@ -1,2 +1,2 @@ -WITH 2 AS `b.c`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t -SELECT INTERVAL CASE CASE WHEN NOT -a[b.c] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka; +WITH 2 AS `b.c`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, 'hi' AS w, NULL AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t +SELECT INTERVAL CASE CASE WHEN NOT -a[`b.c`] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka; diff --git a/tests/queries/0_stateless/02163_shard_num.reference b/tests/queries/0_stateless/02163_shard_num.reference index a109d5d2b6b..77eea7c95b9 100644 --- a/tests/queries/0_stateless/02163_shard_num.reference +++ b/tests/queries/0_stateless/02163_shard_num.reference @@ -1,4 +1,5 @@ --- { echo } +-- { echoOn } + SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num; 2 1 1 1 @@ -14,4 +15,4 @@ SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, SELECT a._shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) a GROUP BY shard_num; 2 1 1 1 -SELECT _shard_num FROM remote('127.1', system.one) AS a INNER JOIN (SELECT _shard_num FROM system.one) AS b USING (dummy); -- { serverError UNKNOWN_IDENTIFIER } +SELECT _shard_num FROM remote('127.1', system.one) AS a INNER JOIN (SELECT _shard_num FROM system.one) AS b USING (dummy); -- { serverError UNSUPPORTED_METHOD, UNKNOWN_IDENTIFIER } diff --git a/tests/queries/0_stateless/02163_shard_num.sql b/tests/queries/0_stateless/02163_shard_num.sql index 27d40b3c976..cc87140ebaf 100644 --- a/tests/queries/0_stateless/02163_shard_num.sql +++ b/tests/queries/0_stateless/02163_shard_num.sql @@ -1,7 +1,10 @@ --- { echo } +-- { echoOn } + SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num; SELECT shardNum() AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num; SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY _shard_num; SELECT _shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) GROUP BY shard_num; SELECT a._shard_num AS shard_num, sum(1) as rows FROM remote('127.{1,2}', system, one) a GROUP BY shard_num; -SELECT _shard_num FROM remote('127.1', system.one) AS a INNER JOIN (SELECT _shard_num FROM system.one) AS b USING (dummy); -- { serverError UNKNOWN_IDENTIFIER } +SELECT _shard_num FROM remote('127.1', system.one) AS a INNER JOIN (SELECT _shard_num FROM system.one) AS b USING (dummy); -- { serverError UNSUPPORTED_METHOD, UNKNOWN_IDENTIFIER } + +-- { echoOff } diff --git a/tests/queries/0_stateless/02169_map_functions.reference b/tests/queries/0_stateless/02169_map_functions.reference index 160aebbc852..10746a70f06 100644 --- a/tests/queries/0_stateless/02169_map_functions.reference +++ b/tests/queries/0_stateless/02169_map_functions.reference @@ -26,8 +26,81 @@ {} {} {} +{'key3':100,'key2':101,'key4':102,'key5':500,'key6':600} +{'key3':101,'key2':102,'key4':103,'key5':500,'key6':600} +{'key3':102,'key2':103,'key4':104,'key5':500,'key6':600} +{'key3':103,'key2':104,'key4':105,'key5':500,'key6':600} +{'key1':1111,'key2':2222,'key5':500,'key6':600} +{'key1':1112,'key2':2224,'key5':500,'key6':600} +{'key1':1113,'key2':2226,'key5':500,'key6':600} +{'key3':100,'key2':101,'key4':102,'key5':500,'key6':600} +{'key3':101,'key2':102,'key4':103,'key5':500,'key6':600} +{'key3':102,'key2':103,'key4':104,'key5':500,'key6':600} +{'key3':103,'key2':104,'key4':105,'key5':500,'key6':600} +{'key1':1111,'key2':2222,'key5':500,'key6':600} +{'key1':1112,'key2':2224,'key5':500,'key6':600} +{'key1':1113,'key2':2226,'key5':500,'key6':600} +{'key5':500,'key6':600} +{'key5':500,'key6':600} +1 +1 +1 +1 +0 +0 +0 +1 +1 +1 +1 +0 +0 +0 +{'key2':101,'key3':100,'key4':102} +{'key2':102,'key3':101,'key4':103} +{'key2':103,'key3':102,'key4':104} +{'key2':104,'key3':103,'key4':105} +{'key1':1111,'key2':2222} +{'key1':1112,'key2':2224} +{'key1':1113,'key2':2226} +{'key3':100,'key2':101,'key4':102} +{'key3':101,'key2':102,'key4':103} +{'key3':102,'key2':103,'key4':104} +{'key3':103,'key2':104,'key4':105} +{'key1':1111,'key2':2222} +{'key1':1112,'key2':2224} +{'key1':1113,'key2':2226} +{'key2':101,'key3':100,'key4':102} +{'key2':102,'key3':101,'key4':103} +{'key2':103,'key3':102,'key4':104} +{'key2':104,'key3':103,'key4':105} +{'key1':1111,'key2':2222} +{'key1':1112,'key2':2224} +{'key1':1113,'key2':2226} {3:2,1:0,2:0} {1:2,2:3} {1:2,2:3} {'x':'y','x':'y'} {'x':'y','x':'y'} +{'k1':11,'k2':22} +{'k1':11,'k2':22} +{'k1':11,'k2':22} +{'k1':11,'k2':22} +{'k1':1,'k2':22,'k3':33,'k4':44} +{'k1':1,'k2':22,'k3':33,'k4':44} +{'k1':1,'k2':22,'k3':33,'k4':44} +{'k1':1,'k2':22,'k3':33,'k4':44} +{'k1':1,'k2':2,'k3':33,'k4':44} +{'k1':1,'k2':2,'k3':33,'k4':44} +{'k1':1,'k2':2,'k3':33,'k4':44} +{'k1':1,'k2':2,'k3':33,'k4':44} +{} +{0:0} +{1:1,0:0} +{1:1,0:0,2:4} +{1:1,3:3,0:0,2:4} +{1:1,3:3,0:0,2:4,4:16} +{1:1,3:3,5:5,0:0,2:4,4:16} +{1:1,3:3,5:5,0:0,2:4,4:16,6:36} +{1:1,3:3,5:5,7:7,0:0,2:4,4:16,6:36} +{1:1,3:3,5:5,7:7,0:0,2:4,4:16,6:36,8:64} diff --git a/tests/queries/0_stateless/02169_map_functions.sql b/tests/queries/0_stateless/02169_map_functions.sql index 4cccaa56722..febaf2bd9d0 100644 --- a/tests/queries/0_stateless/02169_map_functions.sql +++ b/tests/queries/0_stateless/02169_map_functions.sql @@ -7,7 +7,19 @@ SELECT mapFilter((k, v) -> k like '%3' and v > 102, col) FROM table_map ORDER BY SELECT col, mapFilter((k, v) -> ((v % 10) > 1), col) FROM table_map ORDER BY id ASC; SELECT mapApply((k, v) -> (k, v + 1), col) FROM table_map ORDER BY id; SELECT mapFilter((k, v) -> 0, col) from table_map; -SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -- { serverError BAD_ARGUMENTS } + +SELECT mapConcat(col, map('key5', 500), map('key6', 600)) FROM table_map ORDER BY id; +SELECT mapConcat(col, materialize(map('key5', 500)), map('key6', 600)) FROM table_map ORDER BY id; +SELECT concat(map('key5', 500), map('key6', 600)); +SELECT map('key5', 500) || map('key6', 600); + +SELECT mapExists((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id; +SELECT mapExists((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id; + +SELECT mapSort(col) FROM table_map ORDER BY id; +SELECT mapSort((k, v) -> v, col) FROM table_map ORDER BY id; +SELECT mapPartialSort((k, v) -> k, 2, col) FROM table_map ORDER BY id; SELECT mapUpdate(map(1, 3, 3, 2), map(1, 0, 2, 0)); SELECT mapApply((x, y) -> (x, x + 1), map(1, 0, 2, 0)); @@ -15,23 +27,45 @@ SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1, 0, 2, 0))); SELECT mapApply((x, y) -> ('x', 'y'), map(1, 0, 2, 0)); SELECT mapApply((x, y) -> ('x', 'y'), materialize(map(1, 0, 2, 0))); +SELECT mapUpdate(map('k1', 1, 'k2', 2), map('k1', 11, 'k2', 22)); +SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2)), map('k1', 11, 'k2', 22)); +SELECT mapUpdate(map('k1', 1, 'k2', 2), materialize(map('k1', 11, 'k2', 22))); +SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2)), materialize(map('k1', 11, 'k2', 22))); + +SELECT mapUpdate(map('k1', 1, 'k2', 2, 'k3', 3), map('k2', 22, 'k3', 33, 'k4', 44)); +SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2, 'k3', 3)), map('k2', 22, 'k3', 33, 'k4', 44)); +SELECT mapUpdate(map('k1', 1, 'k2', 2, 'k3', 3), materialize(map('k2', 22, 'k3', 33, 'k4', 44))); +SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2, 'k3', 3)), materialize(map('k2', 22, 'k3', 33, 'k4', 44))); + +SELECT mapUpdate(map('k1', 1, 'k2', 2), map('k3', 33, 'k4', 44)); +SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2)), map('k3', 33, 'k4', 44)); +SELECT mapUpdate(map('k1', 1, 'k2', 2), materialize(map('k3', 33, 'k4', 44))); +SELECT mapUpdate(materialize(map('k1', 1, 'k2', 2)), materialize(map('k3', 33, 'k4', 44))); + +WITH (range(0, number % 10), range(0, number % 10))::Map(UInt64, UInt64) AS m1, + (range(0, number % 10, 2), arrayMap(x -> x * x, range(0, number % 10, 2)))::Map(UInt64, UInt64) AS m2 +SELECT DISTINCT mapUpdate(m1, m2) FROM numbers (100000); + SELECT mapApply(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT mapApply((x, y) -> (x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT mapApply((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT mapApply((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT mapApply((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT mapApply((x, y) -> (x, x + 1)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((x, y) -> (x), map(1, 0, 2, 0)); -- { serverError BAD_ARGUMENTS } +SELECT mapApply((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError BAD_ARGUMENTS } +SELECT mapApply((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError BAD_ARGUMENTS } +SELECT mapApply((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError BAD_ARGUMENTS } +SELECT mapApply((x, y) -> (x, x + 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapApply(map(1, 0, 2, 0), (x, y) -> (x, x + 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT mapApply((x, y) -> (x, x+1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((x, y) -> (x, x+1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapFilter(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT mapFilter((x, y) -> (toInt32(x)), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapFilter((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapFilter((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapFilter((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT mapFilter((x, y) -> (x, x + 1)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapFilter((x, y) -> (x, x + 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapFilter(map(1, 0, 2, 0), (x, y) -> (x > 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT mapFilter((x, y) -> (x, x + 1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapFilter((x, y) -> (x, x + 1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT mapConcat([1, 2], map(1, 2)); -- { serverError NO_COMMON_TYPE } +SELECT mapSort(map(1, 2), map(3, 4)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT mapUpdate(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT mapUpdate(map(1, 3, 3, 2), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.reference b/tests/queries/0_stateless/02174_cte_scalar_cache.reference index 817116eda88..1acbef35325 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.reference +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.reference @@ -1,3 +1,3 @@ -02177_CTE_GLOBAL_ON 5 500 11 0 5 -02177_CTE_GLOBAL_OFF 1 100 5 0 1 -02177_CTE_NEW_ANALYZER 2 200 3 0 2 +02177_CTE_GLOBAL_ON 1 100 4 0 1 +02177_CTE_GLOBAL_OFF 1 100 4 0 1 +02177_CTE_NEW_ANALYZER 1 100 4 0 1 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql index 9ed80d08cff..50a10834e64 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + WITH ( SELECT sleep(0.0001) FROM system.one ) as a1, ( SELECT sleep(0.0001) FROM system.one ) as a2, diff --git a/tests/queries/0_stateless/02179_map_cast_to_array.reference b/tests/queries/0_stateless/02179_map_cast_to_array.reference index c1870e78bb7..81bb9fba537 100644 --- a/tests/queries/0_stateless/02179_map_cast_to_array.reference +++ b/tests/queries/0_stateless/02179_map_cast_to_array.reference @@ -6,3 +6,4 @@ {1:{1:'1234'}} [(1,{1:1234})] [(1,{1:1234})] {1:{1:'1234'}} [(1,[(1,'1234')])] [(1,[(1,'1234')])] {1:{1:'1234'}} [(1,[(1,1234)])] [(1,[(1,1234)])] +[(1,'val1'),(2,'val2')] Array(Tuple(k UInt32, v String)) diff --git a/tests/queries/0_stateless/02179_map_cast_to_array.sql b/tests/queries/0_stateless/02179_map_cast_to_array.sql index b1320d7a43c..25b090c10b7 100644 --- a/tests/queries/0_stateless/02179_map_cast_to_array.sql +++ b/tests/queries/0_stateless/02179_map_cast_to_array.sql @@ -24,3 +24,6 @@ SELECT value, cast(value, type), cast(materialize(value), type); WITH map(1, map(1, '1234')) as value, 'Array(Tuple(UInt64, Array(Tuple(UInt64, UInt64))))' AS type SELECT value, cast(value, type), cast(materialize(value), type); + +WITH map(1, 'val1', 2, 'val2') AS map +SELECT CAST(map, 'Array(Tuple(k UInt32, v String))') AS c, toTypeName(c); diff --git a/tests/queries/0_stateless/02179_sparse_columns_detach.reference b/tests/queries/0_stateless/02179_sparse_columns_detach.reference index 2f9714f7a97..04a9b10c09f 100644 --- a/tests/queries/0_stateless/02179_sparse_columns_detach.reference +++ b/tests/queries/0_stateless/02179_sparse_columns_detach.reference @@ -1,12 +1,12 @@ -1000 +954 id Default s Sparse -1000 +954 id Default s Sparse -1000 +954 id Default s Sparse -1000 +954 id Default s Sparse diff --git a/tests/queries/0_stateless/02179_sparse_columns_detach.sql b/tests/queries/0_stateless/02179_sparse_columns_detach.sql index 4720e6720ba..2ae088fedb4 100644 --- a/tests/queries/0_stateless/02179_sparse_columns_detach.sql +++ b/tests/queries/0_stateless/02179_sparse_columns_detach.sql @@ -4,8 +4,8 @@ CREATE TABLE t_sparse_detach(id UInt64, s String) ENGINE = MergeTree ORDER BY id SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; -INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000); -INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000); +INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000); +INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000); OPTIMIZE TABLE t_sparse_detach FINAL; @@ -30,8 +30,8 @@ ALTER TABLE t_sparse_detach MODIFY SETTING vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1; -INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000); -INSERT INTO t_sparse_detach SELECT number, number % 20 = 0 ? toString(number) : '' FROM numbers(10000); +INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000); +INSERT INTO t_sparse_detach SELECT number, number % 21 = 0 ? toString(number) : '' FROM numbers(10000); OPTIMIZE TABLE t_sparse_detach FINAL; diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 4b5ad6c008c..109875d53a5 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -82,7 +82,7 @@ SET default_table_engine = 'Log'; CREATE TEMPORARY TABLE tmp (n int); SHOW CREATE TEMPORARY TABLE tmp; CREATE TEMPORARY TABLE tmp1 (n int) ENGINE=Memory; -CREATE TEMPORARY TABLE tmp2 (n int) ENGINE=Log; -- {serverError 80} +CREATE TEMPORARY TABLE tmp2 (n int) ENGINE=Log; CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 80} CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 80} diff --git a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference index 07705827428..b305806cd08 100644 --- a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference +++ b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.reference @@ -1,54 +1,54 @@ Row 1: ────── -ipv4: 1.2.3.4 -halfMD5(toIPv4('1.2.3.4')): 14356538739656272800 -farmFingerprint64(toIPv4('1.2.3.4')): 5715546585361069049 -xxh3(toIPv4('1.2.3.4')): 14355428563589734825 -wyHash64(toIPv4('1.2.3.4')): 13096729196120951355 -xxHash32(toIPv4('1.2.3.4')): 2430391091 -gccMurmurHash(toIPv4('1.2.3.4')): 5478801830569062645 -murmurHash2_32(toIPv4('1.2.3.4')): 1658978282 -javaHashUTF16LE(toIPv4('1.2.3.4')): 24190 -intHash64(toIPv4('1.2.3.4')): 5715546585361069049 -intHash32(toIPv4('1.2.3.4')): 3152671896 -metroHash64(toIPv4('1.2.3.4')): 5715546585361069049 -hex(murmurHash3_128(toIPv4('1.2.3.4'))): 549E9EF692591F6BB55874EF9A0DE88E -jumpConsistentHash(toIPv4('1.2.3.4'), 42): 37 -sipHash64(toIPv4('1.2.3.4')): 10711397536826262068 -hex(sipHash128(toIPv4('1.2.3.4'))): DBB6A76B92B59789EFB42185DC32311D -kostikConsistentHash(toIPv4('1.2.3.4'), 42): 0 -xxHash64(toIPv4('1.2.3.4')): 14496144933713060978 -murmurHash2_64(toIPv4('1.2.3.4')): 10829690723193326442 -cityHash64(toIPv4('1.2.3.4')): 5715546585361069049 -hiveHash(toIPv4('1.2.3.4')): 122110 -murmurHash3_64(toIPv4('1.2.3.4')): 16570805747704317665 -murmurHash3_32(toIPv4('1.2.3.4')): 1165084099 -yandexConsistentHash(toIPv4('1.2.3.4'), 42): 0 +ipv4: 1.2.3.4 +halfMD5(ipv4): 14356538739656272800 +farmFingerprint64(ipv4): 5715546585361069049 +xxh3(ipv4): 14355428563589734825 +wyHash64(ipv4): 13096729196120951355 +xxHash32(ipv4): 2430391091 +gccMurmurHash(ipv4): 5478801830569062645 +murmurHash2_32(ipv4): 1658978282 +javaHashUTF16LE(ipv4): 24190 +intHash64(ipv4): 5715546585361069049 +intHash32(ipv4): 3152671896 +metroHash64(ipv4): 5715546585361069049 +hex(murmurHash3_128(ipv4)): 549E9EF692591F6BB55874EF9A0DE88E +jumpConsistentHash(ipv4, 42): 37 +sipHash64(ipv4): 10711397536826262068 +hex(sipHash128(ipv4)): DBB6A76B92B59789EFB42185DC32311D +kostikConsistentHash(ipv4, 42): 0 +xxHash64(ipv4): 14496144933713060978 +murmurHash2_64(ipv4): 10829690723193326442 +cityHash64(ipv4): 5715546585361069049 +hiveHash(ipv4): 122110 +murmurHash3_64(ipv4): 16570805747704317665 +murmurHash3_32(ipv4): 1165084099 +yandexConsistentHash(ipv4, 42): 0 Row 1: ────── -ipv6: fe80::62:5aff:fed1:daf0 -halfMD5(toIPv6('fe80::62:5aff:fed1:daf0')): 9503062220758009199 -hex(MD4(toIPv6('fe80::62:5aff:fed1:daf0'))): E35A1A4FB3A3953421AB348B2E1A4A1A -hex(MD5(toIPv6('fe80::62:5aff:fed1:daf0'))): 83E1A8BD8AB7456FC229208409F79798 -hex(SHA1(toIPv6('fe80::62:5aff:fed1:daf0'))): A6D5DCE882AC44804382DE4639E6001612E1C8B5 -hex(SHA224(toIPv6('fe80::62:5aff:fed1:daf0'))): F6995FD7BED2BCA21F68DAC6BBABE742DC1BA177BA8594CEF1715C52 -hex(SHA256(toIPv6('fe80::62:5aff:fed1:daf0'))): F75497BAD6F7747BD6B150B6F69BA2DEE354F1C2A34B7BEA6183973B78640250 -hex(SHA512(toIPv6('fe80::62:5aff:fed1:daf0'))): 0C2893CCBF44BC19CCF339AEED5B68CBFD5A2EF38263A48FE21C3379BA4438E7FF7A02F59D7542442C6E6ED538E6D13D65D3573DADB381651D3D8A5DEA232EAC -farmFingerprint64(toIPv6('fe80::62:5aff:fed1:daf0')): 6643158734288374888 -javaHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770 -xxh3(toIPv6('fe80::62:5aff:fed1:daf0')): 4051340969481364358 -wyHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 18071806066582739916 -xxHash32(toIPv6('fe80::62:5aff:fed1:daf0')): 3353862080 -gccMurmurHash(toIPv6('fe80::62:5aff:fed1:daf0')): 11049311547848936878 -murmurHash2_32(toIPv6('fe80::62:5aff:fed1:daf0')): 1039121047 -javaHashUTF16LE(toIPv6('fe80::62:5aff:fed1:daf0')): -666938696 -metroHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 15333045864940909774 -hex(sipHash128(toIPv6('fe80::62:5aff:fed1:daf0'))): 31D50562F877B1F92A99B05B646568B7 -hex(murmurHash3_128(toIPv6('fe80::62:5aff:fed1:daf0'))): 6FFEF0C1DF8B5B472FE2EDF0C76C12B9 -sipHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 5681592867096972315 -xxHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 4533874364641685764 -murmurHash2_64(toIPv6('fe80::62:5aff:fed1:daf0')): 11839090601505681839 -cityHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 1599722731594796935 -hiveHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770 -murmurHash3_64(toIPv6('fe80::62:5aff:fed1:daf0')): 18323430650022796352 -murmurHash3_32(toIPv6('fe80::62:5aff:fed1:daf0')): 3971193740 +ipv6: fe80::62:5aff:fed1:daf0 +halfMD5(ipv6): 9503062220758009199 +hex(MD4(ipv6)): E35A1A4FB3A3953421AB348B2E1A4A1A +hex(MD5(ipv6)): 83E1A8BD8AB7456FC229208409F79798 +hex(SHA1(ipv6)): A6D5DCE882AC44804382DE4639E6001612E1C8B5 +hex(SHA224(ipv6)): F6995FD7BED2BCA21F68DAC6BBABE742DC1BA177BA8594CEF1715C52 +hex(SHA256(ipv6)): F75497BAD6F7747BD6B150B6F69BA2DEE354F1C2A34B7BEA6183973B78640250 +hex(SHA512(ipv6)): 0C2893CCBF44BC19CCF339AEED5B68CBFD5A2EF38263A48FE21C3379BA4438E7FF7A02F59D7542442C6E6ED538E6D13D65D3573DADB381651D3D8A5DEA232EAC +farmFingerprint64(ipv6): 6643158734288374888 +javaHash(ipv6): 684606770 +xxh3(ipv6): 4051340969481364358 +wyHash64(ipv6): 18071806066582739916 +xxHash32(ipv6): 3353862080 +gccMurmurHash(ipv6): 11049311547848936878 +murmurHash2_32(ipv6): 1039121047 +javaHashUTF16LE(ipv6): -666938696 +metroHash64(ipv6): 15333045864940909774 +hex(sipHash128(ipv6)): 31D50562F877B1F92A99B05B646568B7 +hex(murmurHash3_128(ipv6)): 6FFEF0C1DF8B5B472FE2EDF0C76C12B9 +sipHash64(ipv6): 5681592867096972315 +xxHash64(ipv6): 4533874364641685764 +murmurHash2_64(ipv6): 11839090601505681839 +cityHash64(ipv6): 1599722731594796935 +hiveHash(ipv6): 684606770 +murmurHash3_64(ipv6): 18323430650022796352 +murmurHash3_32(ipv6): 3971193740 diff --git a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql index 67aae812144..d96574ef4fe 100644 --- a/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql +++ b/tests/queries/0_stateless/02184_hash_functions_and_ip_types.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET allow_experimental_analyzer = 1; + SELECT toIPv4('1.2.3.4') AS ipv4, halfMD5(ipv4), diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.python b/tests/queries/0_stateless/02187_async_inserts_all_formats.python index 65a323ef9db..fa555c78f8b 100644 --- a/tests/queries/0_stateless/02187_async_inserts_all_formats.python +++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.python @@ -3,47 +3,71 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') -CLICKHOUSE_TMP = os.environ.get('CLICKHOUSE_TMP') +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") +CLICKHOUSE_TMP = os.environ.get("CLICKHOUSE_TMP") from pure_http_client import ClickHouseClient client = ClickHouseClient() + def run_test(data_format, gen_data_template, settings): print(data_format) client.query("TRUNCATE TABLE t_async_insert") expected = client.query(gen_data_template.format("TSV")).strip() - data = client.query(gen_data_template.format(data_format), settings=settings,binary_result=True) + data = client.query( + gen_data_template.format(data_format), settings=settings, binary_result=True + ) insert_query = "INSERT INTO t_async_insert FORMAT {}".format(data_format) client.query_with_data(insert_query, data, settings=settings) result = client.query("SELECT * FROM t_async_insert FORMAT TSV").strip() if result != expected: - print("Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(data_format, expected, result)) + print( + "Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format( + data_format, expected, result + ) + ) exit(1) -formats = client.query("SELECT name FROM system.formats WHERE is_input AND is_output \ - AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name").strip().split('\n') + +formats = ( + client.query( + "SELECT name FROM system.formats WHERE is_input AND is_output \ + AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name" + ) + .strip() + .split("\n") +) # Generic formats client.query("DROP TABLE IF EXISTS t_async_insert") -client.query("CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory") +client.query( + "CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory" +) gen_data_query = "SELECT number AS id, toString(number) AS s, range(number) AS arr FROM numbers(10) FORMAT {}" for data_format in formats: - run_test(data_format, gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1}) + run_test( + data_format, + gen_data_query, + settings={"async_insert": 1, "wait_for_async_insert": 1}, + ) # LineAsString client.query("DROP TABLE IF EXISTS t_async_insert") client.query("CREATE TABLE t_async_insert (s String) ENGINE = Memory") gen_data_query = "SELECT toString(number) AS s FROM numbers(10) FORMAT {}" -run_test('LineAsString', gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1}) +run_test( + "LineAsString", + gen_data_query, + settings={"async_insert": 1, "wait_for_async_insert": 1}, +) # TODO: add CapnProto and Protobuf diff --git a/tests/queries/0_stateless/02187_msg_pack_uuid.sh b/tests/queries/0_stateless/02187_msg_pack_uuid.sh index 9be92d66790..f04ef09a8c8 100755 --- a/tests/queries/0_stateless/02187_msg_pack_uuid.sh +++ b/tests/queries/0_stateless/02187_msg_pack_uuid.sh @@ -5,13 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'" +$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str', engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')" -$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'" +$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin', engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')" -$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'" +$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext', engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')" $CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1" diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index 397e06cbe82..d8f8a32b6db 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from http.server import SimpleHTTPRequestHandler,HTTPServer +from http.server import SimpleHTTPRequestHandler, HTTPServer import socket import sys import threading @@ -17,6 +17,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -27,20 +28,19 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") # Server returns this JSON response. -SERVER_JSON_RESPONSE = \ -'''{ +SERVER_JSON_RESPONSE = """{ "login": "ClickHouse", "id": 54801242, "name": "ClickHouse", "company": null -}''' +}""" -EXPECTED_ANSWER = \ -'''{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}''' +EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}""" ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -51,26 +51,38 @@ EXPECTED_ANSWER = \ ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -79,16 +91,17 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + # Server with check for User-Agent headers. class HttpProcessor(SimpleHTTPRequestHandler): def _set_headers(self): - user_agent = self.headers.get('User-Agent') - if user_agent and user_agent.startswith('ClickHouse/'): + user_agent = self.headers.get("User-Agent") + if user_agent and user_agent.startswith("ClickHouse/"): self.send_response(200) else: self.send_response(403) - self.send_header('Content-Type', 'text/csv') + self.send_header("Content-Type", "text/csv") self.end_headers() def do_GET(self): @@ -98,9 +111,11 @@ class HttpProcessor(SimpleHTTPRequestHandler): def log_message(self, format, *args): return + class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(requests_amount): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) @@ -114,15 +129,18 @@ def start_server(requests_amount): t = threading.Thread(target=real_func) return t + ##################################################################### # Testing area. ##################################################################### + def test_select(): global HTTP_SERVER_URL_STR - query = 'SELECT * FROM url(\'{}\',\'JSONAsString\');'.format(HTTP_SERVER_URL_STR) + query = "SELECT * FROM url('{}','JSONAsString');".format(HTTP_SERVER_URL_STR) check_answers(query, EXPECTED_ANSWER) + def main(): # HEAD + GET t = start_server(2) @@ -131,6 +149,7 @@ def main(): t.join() print("PASSED") + if __name__ == "__main__": try: main() @@ -141,4 +160,3 @@ if __name__ == "__main__": sys.stderr.flush() os._exit(1) - diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh index 693f1d817e3..0345a0e6394 100755 --- a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh @@ -1,11 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-fasttest -# Tag no-tsan: requires jemalloc to track small allocations -# Tag no-asan: requires jemalloc to track small allocations -# Tag no-ubsan: requires jemalloc to track small allocations -# Tag no-msan: requires jemalloc to track small allocations - - +# Tags: no-parallel, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02210_processors_profile_log_2.reference b/tests/queries/0_stateless/02210_processors_profile_log_2.reference index 5467c7ef2ba..b9a848131fd 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log_2.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log_2.reference @@ -9,3 +9,4 @@ NullSource 0 0 0 0 NumbersMt 0 0 1000000 8000000 Resize 1 8 1 8 Resize 1 8 1 8 +1 diff --git a/tests/queries/0_stateless/02210_processors_profile_log_2.sh b/tests/queries/0_stateless/02210_processors_profile_log_2.sh index 93eabc2f0fe..044954a4e96 100755 --- a/tests/queries/0_stateless/02210_processors_profile_log_2.sh +++ b/tests/queries/0_stateless/02210_processors_profile_log_2.sh @@ -17,3 +17,5 @@ EOF ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -q "select name, sum(input_rows), sum(input_bytes), sum(output_rows), sum(output_bytes) from system.processors_profile_log where query_id = '${QUERY_ID}' group by name, plan_step, plan_group order by name, sum(input_rows), sum(input_bytes), sum(output_rows), sum(output_bytes)" + +${CLICKHOUSE_CLIENT} -q "select countDistinct(initial_query_id) from system.processors_profile_log where query_id = '${QUERY_ID}'" \ No newline at end of file diff --git a/tests/queries/0_stateless/02211_jsonl_format_extension.sql b/tests/queries/0_stateless/02211_jsonl_format_extension.sql index 08fff5a11f5..61cc2a408fa 100644 --- a/tests/queries/0_stateless/02211_jsonl_format_extension.sql +++ b/tests/queries/0_stateless/02211_jsonl_format_extension.sql @@ -1,3 +1,3 @@ --- Tags: no-fasttest -insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10); -select * from file('data.jsonl'); +-- Tags: no-fasttest, no-parallel +insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10) SETTINGS engine_file_truncate_on_insert=1; +select * from file('data.jsonl') order by x; diff --git a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh index cce32bf8272..3c44a2a7ba7 100755 --- a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh +++ b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null +${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference index 60d3c78d740..53b44764d5c 100644 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference @@ -36,6 +36,8 @@ log log log_pointer log_pointer +lost_part_count +lost_part_count max_processed_insert_time max_processed_insert_time metadata diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference index c59be6a3af5..ccc3064ccbd 100644 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference @@ -17,6 +17,7 @@ leader_election leader_election-0 log log_pointer +lost_part_count max_processed_insert_time metadata metadata @@ -58,6 +59,7 @@ leader_election leader_election-0 log log_pointer +lost_part_count max_processed_insert_time metadata metadata diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference b/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference index 6165079994f..d741391067c 100644 --- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference +++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference @@ -78,17 +78,17 @@ SELECT materialize(\'Привет, World\') AS s1, materialize(\'Привет, World\') AS s2 WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\') -SETTINGS optimize_or_like_chain = 1 +SETTINGS optimize_or_like_chain = 1, allow_hyperscan = 0 SELECT materialize(\'Привет, World\') AS s1, materialize(\'Привет, World\') AS s2 WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\') -SETTINGS optimize_or_like_chain = 1 +SETTINGS optimize_or_like_chain = 1, max_hyperscan_regexp_length = 10 SELECT materialize(\'Привет, World\') AS s1, materialize(\'Привет, World\') AS s2 WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\') -SETTINGS optimize_or_like_chain = 1 +SETTINGS optimize_or_like_chain = 1, max_hyperscan_regexp_total_length = 10 SELECT materialize(\'Привет, World\') AS s1, materialize(\'Привет, World\') AS s2 diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql index dec73c201ef..fbebfc6d281 100644 --- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql +++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql @@ -4,9 +4,9 @@ EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hel EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; -EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS allow_hyperscan = 0; -EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS max_hyperscan_regexp_length = 10; -EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS max_hyperscan_regexp_total_length = 10; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1, allow_hyperscan = 0; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1, max_hyperscan_regexp_length = 10; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1, max_hyperscan_regexp_total_length = 10; EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') OR s1 == 'Привет' SETTINGS optimize_or_like_chain = 1; diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference index d895040ef59..2ee0f256949 100644 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference @@ -1,15 +1,15 @@ Using storage policy: s3_cache 1 0 1 0 1 0 -0 0 1 0 +0 Using storage policy: local_cache 1 0 1 0 1 0 -0 0 1 0 +0 Using storage policy: azure_cache 1 0 1 0 1 0 -0 0 1 0 +0 diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh index 96e51a58cc4..f071a570243 100755 --- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh +++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh @@ -64,19 +64,6 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do set remote_filesystem_read_method='threadpool'; """ - clickhouse client --multiquery --multiline --query """ - SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; - - SET enable_filesystem_cache_on_write_operations = 1; - - TRUNCATE TABLE test_02226; - SELECT count() FROM test_02226; - - SYSTEM DROP FILESYSTEM CACHE; - - INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; - """ - query_id=$(clickhouse client --query "select queryID() from ($query) limit 1") clickhouse client --multiquery --multiline --query """ @@ -90,7 +77,20 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do AND current_database = currentDatabase() ORDER BY query_start_time DESC LIMIT 1; - - DROP TABLE test_02226; """ + + clickhouse client --multiquery --multiline --query """ + SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; + + SET enable_filesystem_cache_on_write_operations = 1; + + TRUNCATE TABLE test_02226; + SELECT count() FROM test_02226; + + SYSTEM DROP FILESYSTEM CACHE; + + INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + """ + + clickhouse client --query "DROP TABLE test_02226" done diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh index 2a163746e20..4b8f8da5480 100755 --- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -15,6 +15,7 @@ as select * from numbers(1); # Logical error: 'Coordinator for parallel reading from replicas is not initialized'. opts=( --allow_experimental_parallel_reading_from_replicas 1 + --parallel_replicas_for_non_replicated_merge_tree 1 --max_parallel_replicas 3 --iterations 1 diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference index cebcc42dcba..e51ea983f7f 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -1,40 +1,53 @@ --- { echo } +-- { echoOn } + EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); -Expression (Projection) +Expression (Project names) Header: avgWeighted(x, y) Nullable(Float64) - Expression (Before ORDER BY) - Header: avgWeighted(x, y) Nullable(Float64) + Expression (Projection) + Header: avgWeighted(x_0, y_1) Nullable(Float64) Aggregating - Header: avgWeighted(x, y) Nullable(Float64) + Header: avgWeighted(x_0, y_1) Nullable(Float64) Expression (Before GROUP BY) - Header: x Nullable(UInt8) - y UInt8 - Union - Header: x Nullable(UInt8) - y UInt8 - Expression (Conversion before UNION) - Header: x Nullable(UInt8) + Header: x_0 Nullable(UInt8) + y_1 UInt8 + Expression (Change column names to column identifiers) + Header: x_0 Nullable(UInt8) + y_1 UInt8 + Union + Header: NULL Nullable(UInt8) + x Nullable(UInt8) y UInt8 - Expression (Projection) - Header: x UInt8 + Expression (Conversion before UNION) + Header: NULL Nullable(UInt8) + x Nullable(UInt8) y UInt8 - Expression (Before ORDER BY) - Header: 255 UInt8 - 1 UInt8 - dummy UInt8 - ReadFromStorage (SystemOne) - Header: dummy UInt8 - Expression (Conversion before UNION) - Header: x Nullable(UInt8) - y UInt8 - Expression (Projection) - Header: x Nullable(Nothing) + Expression (Project names) + Header: NULL Nullable(Nothing) + x UInt8 + y UInt8 + Expression (Projection) + Header: NULL_Nullable(Nothing) Nullable(Nothing) + 255_UInt8 UInt8 + 1_UInt8 UInt8 + Expression (Change column names to column identifiers) + Header: system.one.dummy_0 UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (Conversion before UNION) + Header: NULL Nullable(UInt8) + x Nullable(UInt8) y UInt8 - Expression (Before ORDER BY) - Header: 1 UInt8 - NULL Nullable(Nothing) - dummy UInt8 - ReadFromStorage (SystemOne) - Header: dummy UInt8 + Expression (Project names) + Header: y UInt8 + x Nullable(Nothing) + y UInt8 + Expression (Projection) + Header: 1_UInt8 UInt8 + NULL_Nullable(Nothing) Nullable(Nothing) + 1_UInt8 UInt8 + Expression (Change column names to column identifiers) + Header: system.one.dummy_0 UInt8 + ReadFromStorage (SystemOne) + Header: dummy UInt8 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); 255 diff --git a/tests/queries/0_stateless/02227_union_match_by_name.sql b/tests/queries/0_stateless/02227_union_match_by_name.sql index cc0ab8ba5aa..6a19add1d37 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.sql +++ b/tests/queries/0_stateless/02227_union_match_by_name.sql @@ -1,3 +1,8 @@ --- { echo } +SET allow_experimental_analyzer = 1; + +-- { echoOn } + EXPLAIN header = 1, optimize = 0 SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); SELECT avgWeighted(x, y) FROM (SELECT NULL, 255 AS x, 1 AS y UNION ALL SELECT y, NULL AS x, 1 AS y); + +-- { echoOff } diff --git a/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh index 171dcc52c9c..e5d00bc1a1c 100755 --- a/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh +++ b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql index a53b7f50e51..dbe18953957 100644 --- a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql +++ b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql @@ -28,7 +28,7 @@ create materialized view mv_02231 to buffer_02231 as select from in_02231 group by key; -insert into in_02231 select * from numbers(10e6) settings max_memory_usage='300Mi'; +insert into in_02231 select * from numbers(10e6) settings max_memory_usage='310Mi', max_threads=1; drop table buffer_02231; drop table out_02231; diff --git a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh index 322e7e73991..734cef06214 100755 --- a/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh +++ b/tests/queries/0_stateless/02232_dist_insert_send_logs_level_hung.sh @@ -49,7 +49,16 @@ insert_client_opts=( timeout 250s $CLICKHOUSE_CLIENT "${client_opts[@]}" "${insert_client_opts[@]}" -q "insert into function remote('127.2', currentDatabase(), in_02232) select * from numbers(1e6)" # Kill underlying query of remote() to make KILL faster -timeout 30s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE Settings['log_comment'] = '$CLICKHOUSE_LOG_COMMENT' SYNC" --format Null +# This test is reproducing very interesting bahaviour. +# The block size is 1, so the secondary query creates InterpreterSelectQuery for each row due to pushing to the MV. +# It works extremely slow, and the initial query produces new blocks and writes them to the socket much faster +# then the secondary query can read and process them. Therefore, it fills network buffers in the kernel. +# Once a buffer in the kernel is full, send(...) blocks until the secondary query will finish processing data +# that it already has in ReadBufferFromPocoSocket and call recv. +# Or until the kernel will decide to resize the buffer (seems like it has non-trivial rules for that). +# Anyway, it may look like the initial query got stuck, but actually it did not. +# Moreover, the initial query cannot be killed at that point, so KILL QUERY ... SYNC will get "stuck" as well. +timeout 30s $CLICKHOUSE_CLIENT "${client_opts[@]}" -q "KILL QUERY WHERE query like '%INSERT INTO $CLICKHOUSE_DATABASE.in_02232%' SYNC" --format Null echo $? $CLICKHOUSE_CLIENT "${client_opts[@]}" -nm -q " diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python index e74d494edf5..66ef3304098 100644 --- a/tests/queries/0_stateless/02233_HTTP_ranged.python +++ b/tests/queries/0_stateless/02233_HTTP_ranged.python @@ -122,7 +122,7 @@ class HttpProcessor(BaseHTTPRequestHandler): get_call_num = 0 responses_to_get = [] - def send_head(self, from_get = False): + def send_head(self, from_get=False): if self.headers["Range"] and HttpProcessor.allow_range: try: self.range = parse_byte_range(self.headers["Range"]) @@ -146,7 +146,9 @@ class HttpProcessor(BaseHTTPRequestHandler): self.send_error(416, "Requested Range Not Satisfiable") return None - retry_range_request = first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0 + retry_range_request = ( + first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0 + ) if retry_range_request: code = HttpProcessor.responses_to_get.pop() if code not in HttpProcessor.responses: @@ -244,7 +246,9 @@ def run_test(allow_range, settings, check_retries=False): raise Exception("HTTP Range was not used when supported") if check_retries and len(HttpProcessor.responses_to_get) > 0: - raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried") + raise Exception( + "Expected to get http response 500, which had to be retried, but 200 ok returned and then retried" + ) if retries_num > 0: expected_get_call_num += retries_num - 1 @@ -263,7 +267,7 @@ def run_test(allow_range, settings, check_retries=False): def main(): - settings = {"max_download_buffer_size" : 20} + settings = {"max_download_buffer_size": 20} # Test Accept-Ranges=False run_test(allow_range=False, settings=settings) @@ -271,7 +275,7 @@ def main(): run_test(allow_range=True, settings=settings) # Test Accept-Ranges=True, parallel download is used - settings = {"max_download_buffer_size" : 10} + settings = {"max_download_buffer_size": 10} run_test(allow_range=True, settings=settings) # Test Accept-Ranges=True, parallel download is not used, diff --git a/tests/queries/0_stateless/02233_interpolate_1.sql b/tests/queries/0_stateless/02233_interpolate_1.sql index 229c36e23fb..3d416b27f45 100644 --- a/tests/queries/0_stateless/02233_interpolate_1.sql +++ b/tests/queries/0_stateless/02233_interpolate_1.sql @@ -36,7 +36,7 @@ SELECT n, source, inter FROM ( # Test INTERPOLATE with inconsistent column - should produce error SELECT n, source, inter FROM ( SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1 -) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS source); -- { serverError 32 } +) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS source); -- { serverError 6, 32 } # Test INTERPOLATE with aliased column SELECT n, source, inter + 1 AS inter_p FROM ( diff --git a/tests/queries/0_stateless/02233_with_total_empty_chunk.sql b/tests/queries/0_stateless/02233_with_total_empty_chunk.sql index bf9ce85b6ed..e1e8186ed76 100644 --- a/tests/queries/0_stateless/02233_with_total_empty_chunk.sql +++ b/tests/queries/0_stateless/02233_with_total_empty_chunk.sql @@ -1 +1,3 @@ -SELECT (NULL, NULL, NULL, NULL, NULL, NULL, NULL) FROM numbers(0) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([]); +SET allow_experimental_analyzer = 1; + +SELECT (NULL, NULL, NULL, NULL, NULL, NULL, NULL) FROM numbers(0) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([]) -- { serverError 59 }; diff --git a/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh b/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh index f736751726d..16200399099 100755 --- a/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh +++ b/tests/queries/0_stateless/02234_clickhouse_local_test_mode.sh @@ -5,5 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --query="SELECT n" 2>&1 | grep -q "Code: 47. DB::Exception: Missing columns:" && echo 'OK' || echo 'FAIL' ||: +$CLICKHOUSE_LOCAL --query="SELECT n SETTINGS allow_experimental_analyzer = 1" 2>&1 | grep -q "Code: 47. DB::Exception:" && echo 'OK' || echo 'FAIL' ||: $CLICKHOUSE_LOCAL --query="SELECT n -- { serverError 47 }" diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index f4b9f7bb127..16c4cd1c049 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -6,6 +6,7 @@ SET skip_download_if_exceeds_query_cache=1; SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index 94eb4bc5ccd..44856a2188c 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -9,8 +9,8 @@ SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100); - SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; SYSTEM DROP FILESYSTEM CACHE; diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index d3be4855b36..f960b4eb21c 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -1,10 +1,68 @@ Using storage policy: s3_cache -0 79 80 -0 745 746 -0 745 746 -0 745 746 +0 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache Using storage policy: local_cache -0 79 80 -0 745 746 -0 745 746 -0 745 746 +0 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index a487f3ca739..c7dc9fbd961 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -9,34 +9,69 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) for STORAGE_POLICY in 's3_cache' 'local_cache'; do echo "Using storage policy: $STORAGE_POLICY" + ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES" ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.filesystem_cache" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy" - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false" ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES test_02240_storage_policy" ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy SELECT number, toString(number) FROM numbers(100)" + + echo 'Expect cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; + + echo 'Expect cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + echo 'Expect no cache' ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + + echo 'Expect cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + echo 'Expect no cache' ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3" - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false" + ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false" ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)" + + echo 'Expect cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; + + echo 'Expect cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; + + echo 'Expect no cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" + + echo 'Expect cache' + ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" + ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" + ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" + ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" + echo 'Expect no cache' ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" done diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index bbca9bbbfee..b3b7d12d219 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -1,60 +1,60 @@ Using storage policy: s3_cache 0 -0 +0 0 Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -7 -7 +8 +8 1100 0 2 2 -7 +8 1100 Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -7 -7 -7 -7 -21 -31 -38 +8 +8 2014 +8 2014 +8 2014 +24 84045 +35 168815 +44 252113 5010500 18816 Using storage policy: local_cache 0 -0 +0 0 Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -7 -7 +8 +8 1100 0 2 2 -7 +8 1100 Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -7 -7 -7 -7 -21 -31 -38 +8 +8 2014 +8 2014 +8 2014 +24 84045 +35 168815 +44 252113 5010500 18816 diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 048fb792e6e..e65bf9cb35f 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -33,7 +33,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do FORMAT Vertical" $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" @@ -54,7 +54,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do FORMAT Vertical" $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" @@ -64,7 +64,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT count() size FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) size FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" @@ -87,24 +87,23 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do FORMAT Vertical;" $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --query "SYSTEM START MERGES test_02241" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - + $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" diff --git a/tests/queries/0_stateless/02242_make_date.reference b/tests/queries/0_stateless/02242_make_date.reference index 0e8500baeb7..dc85d872607 100644 --- a/tests/queries/0_stateless/02242_make_date.reference +++ b/tests/queries/0_stateless/02242_make_date.reference @@ -56,6 +56,8 @@ Nullable(Date) \N \N \N +1980-01-01 +1980-01-01 1984-02-03 \N \N diff --git a/tests/queries/0_stateless/02242_make_date.sql b/tests/queries/0_stateless/02242_make_date.sql index 9e66daa2778..78feabfffb3 100644 --- a/tests/queries/0_stateless/02242_make_date.sql +++ b/tests/queries/0_stateless/02242_make_date.sql @@ -79,9 +79,11 @@ select makeDate(1980, NULL, 4); select makeDate(1980, 3, NULL); select makeDate(1980); -- { serverError 42 } -select makeDate(1980, 1); -- { serverError 42 } select makeDate(1980, 1, 1, 1); -- { serverError 42 } +select MAKEDATE(1980, 1, 1); +select MAKEDATE(1980, 1); + select makeDate(year, month, day) from (select NULL as year, 2 as month, 3 as day union all select 1984 as year, 2 as month, 3 as day) order by year, month, day; select makeDate(year, month, day) from (select NULL as year, 2 as month, 3 as day union all select NULL as year, 2 as month, 3 as day) order by year, month, day; diff --git a/tests/queries/0_stateless/02242_make_date_mysql.reference b/tests/queries/0_stateless/02242_make_date_mysql.reference new file mode 100644 index 00000000000..0fc51594a8c --- /dev/null +++ b/tests/queries/0_stateless/02242_make_date_mysql.reference @@ -0,0 +1,25 @@ +Date +Nullable(Date) +Nullable(Date) +1970-01-01 +2020-01-08 +1970-01-01 +1970-01-01 +1980-01-09 +1970-01-01 +1980-01-09 +1970-01-01 +1980-01-09 +1970-01-01 +1970-01-01 +1970-01-01 +2149-06-06 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-02 +\N +\N diff --git a/tests/queries/0_stateless/02242_make_date_mysql.sql b/tests/queries/0_stateless/02242_make_date_mysql.sql new file mode 100644 index 00000000000..82d80579788 --- /dev/null +++ b/tests/queries/0_stateless/02242_make_date_mysql.sql @@ -0,0 +1,41 @@ +select toTypeName(makeDate(1991, 8)); +select toTypeName(makeDate(cast(1991 as Nullable(UInt64)), 8)); +select toTypeName(makeDate(1991, cast(8 as Nullable(UInt64)))); + +select makeDate(1970, 01); +select makeDate(2020, 08); +select makeDate(-1980, 10); +select makeDate(1980, -10); +select makeDate(1980.0, 9); +select makeDate(-1980.0, 9); +select makeDate(cast(1980.1 as Decimal(20,5)), 9); +select makeDate(cast('-1980.1' as Decimal(20,5)), 9); +select makeDate(cast(1980.1 as Float32), 9); +select makeDate(cast(-1980.1 as Float32), 9); + +select makeDate(cast(1980 as Date), 10); -- { serverError 43 } +select makeDate(cast(-1980 as Date), 10); -- { serverError 43 } +select makeDate(cast(1980 as Date32), 10); -- { serverError 43 } +select makeDate(cast(-1980 as Date32), 10); -- { serverError 43 } +select makeDate(cast(1980 as DateTime), 10); -- { serverError 43 } +select makeDate(cast(-1980 as DateTime), 10); -- { serverError 43 } +select makeDate(cast(1980 as DateTime64), 10); -- { serverError 43 } +select makeDate(cast(-1980 as DateTime64), 10); -- { serverError 43 } +select makeDate('1980', '10'); -- { serverError 43 } +select makeDate('-1980', 3); -- { serverError 43 } +select makeDate('aa', 3); -- { serverError 43 } +select makeDate(1994, 'aa'); -- { serverError 43 } + +select makeDate(0, 1); +select makeDate(19800, 12); +select makeDate(2149, 157); +select makeDate(2149, 158); +select makeDate(1969,355); +select makeDate(1969,356); +select makeDate(1969,357); +select makeDate(1970,0); +select makeDate(1970,1); +select makeDate(1970,2); + +select makeDate(NULL, 3); +select makeDate(1980, NULL); diff --git a/tests/queries/0_stateless/02243_make_date32.sql b/tests/queries/0_stateless/02243_make_date32.sql index 2cf4ac6b358..9b0009b33a2 100644 --- a/tests/queries/0_stateless/02243_make_date32.sql +++ b/tests/queries/0_stateless/02243_make_date32.sql @@ -78,7 +78,6 @@ select makeDate32(1980, NULL, 4); select makeDate32(1980, 3, NULL); select makeDate32(1980); -- { serverError 42 } -select makeDate32(1980, 1); -- { serverError 42 } select makeDate32(1980, 1, 1, 1); -- { serverError 42 } select makeDate32(year, month, day) from (select NULL as year, 2 as month, 3 as day union all select 1984 as year, 2 as month, 3 as day) order by year, month, day; diff --git a/tests/queries/0_stateless/02243_make_date32_mysql.reference b/tests/queries/0_stateless/02243_make_date32_mysql.reference new file mode 100644 index 00000000000..07c08a2ec70 --- /dev/null +++ b/tests/queries/0_stateless/02243_make_date32_mysql.reference @@ -0,0 +1,26 @@ +Date32 +Nullable(Date32) +Nullable(Date32) +1900-01-01 +2020-01-08 +1970-01-01 +1970-01-01 +1980-01-09 +1970-01-01 +1980-01-09 +1970-01-01 +1980-01-09 +1970-01-01 +1970-01-01 +1970-01-01 +2299-12-31 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 +1900-01-01 +1900-01-02 +\N +\N diff --git a/tests/queries/0_stateless/02243_make_date32_mysql.sql b/tests/queries/0_stateless/02243_make_date32_mysql.sql new file mode 100644 index 00000000000..4a67dcd80de --- /dev/null +++ b/tests/queries/0_stateless/02243_make_date32_mysql.sql @@ -0,0 +1,42 @@ +select toTypeName(makeDate32(1991, 8)); +select toTypeName(makeDate32(cast(1991 as Nullable(UInt64)), 8)); +select toTypeName(makeDate32(1991, cast(8 as Nullable(UInt64)))); + +select makeDate32(1900, 01); +select makeDate32(2020, 08); +select makeDate32(-1980, 10); +select makeDate32(1980, -10); +select makeDate32(1980.0, 9); +select makeDate32(-1980.0, 9); +select makeDate32(cast(1980.1 as Decimal(20,5)), 9); +select makeDate32(cast('-1980.1' as Decimal(20,5)), 9); +select makeDate32(cast(1980.1 as Float32), 9); +select makeDate32(cast(-1980.1 as Float32), 9); + +select makeDate32(cast(1980 as Date), 10); -- { serverError 43 } +select makeDate32(cast(-1980 as Date), 10); -- { serverError 43 } +select makeDate32(cast(1980 as Date32), 10); -- { serverError 43 } +select makeDate32(cast(-1980 as Date32), 10); -- { serverError 43 } +select makeDate32(cast(1980 as DateTime), 10); -- { serverError 43 } +select makeDate32(cast(-1980 as DateTime), 10); -- { serverError 43 } +select makeDate32(cast(1980 as DateTime64), 10); -- { serverError 43 } +select makeDate32(cast(-1980 as DateTime64), 10); -- { serverError 43 } +select makeDate32('1980', '10'); -- { serverError 43 } +select makeDate32('-1980', 3); -- { serverError 43 } +select makeDate32('aa', 3); -- { serverError 43 } +select makeDate32(1994, 'aa'); -- { serverError 43 } + +select makeDate32(0, 1); +select makeDate32(19800, 12); +select makeDate32(2299, 365); +select makeDate32(2299, 366); +select makeDate32(2300, 1); +select makeDate32(1899, 365); +select makeDate32(1899, 366); +select makeDate32(1899, 367); +select makeDate32(1900, 0); +select makeDate32(1900, 1); +select makeDate32(1900, 2); + +select makeDate32(NULL, 3); +select makeDate32(1980, NULL); diff --git a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql index f0c0e2bae46..245b2cc97e3 100644 --- a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql +++ b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS github_events; CREATE TABLE github_events @@ -59,6 +61,6 @@ CREATE TABLE github_events ) ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at); -with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name FORMAT TabSeparatedWithNamesAndTypes; -- { serverError 47 } +with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name; DROP TABLE github_events; diff --git a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 b/tests/queries/0_stateless/02273_full_sort_join.reference.j2 index 98bfd9d9b2b..0af4158e971 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.reference.j2 @@ -1,7 +1,7 @@ {% set table_size = 15 -%} {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%} -- {{ join_algorithm }} -- -{% for block_size in range(1, table_size + 1) -%} +{% for block_size in range(1, table_size + 1, 4) -%} ALL INNER USING | bs = {{ block_size }} 4 0 0 5 0 0 @@ -50,7 +50,6 @@ ALL LEFT | bs = {{ block_size }} 14 14 val9 0 14 14 val9 0 ALL RIGHT | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -64,7 +63,6 @@ ALL RIGHT | bs = {{ block_size }} 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ALL INNER | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 0 0 2 2 2 2 2 2 0 0 @@ -85,7 +83,6 @@ ALL LEFT | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ALL RIGHT | bs = {{ block_size }} | copmosite key -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 1 1 1 1 val2 0 \N 0 1 1 1 1 val7 0 \N 0 1 1 2 1 val5 @@ -99,7 +96,6 @@ ALL RIGHT | bs = {{ block_size }} | copmosite key 0 \N 0 2 2 \N 1 val9 2 2 2 2 2 2 0 val4 2 2 2 2 2 2 0 val4 -{% endif -%} ANY INNER USING | bs = {{ block_size }} 4 0 0 5 0 0 @@ -137,7 +133,6 @@ ANY LEFT | bs = {{ block_size }} 13 13 val13 0 14 14 val9 0 ANY RIGHT | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -150,7 +145,6 @@ ANY RIGHT | bs = {{ block_size }} 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ANY INNER | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 0 0 ANY LEFT | bs = {{ block_size }} | copmosite key @@ -170,7 +164,6 @@ ANY LEFT | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ANY RIGHT | bs = {{ block_size }} | copmosite key -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 1 1 1 1 val2 0 \N 0 1 1 1 1 val7 0 \N 0 1 1 2 1 val5 @@ -183,7 +176,6 @@ ANY RIGHT | bs = {{ block_size }} | copmosite key 0 \N 0 2 1 \N 1 val3 0 \N 0 2 2 \N 1 val9 2 2 2 2 2 2 0 val4 -{% endif -%} {% endfor -%} ALL INNER | join_use_nulls = 1 4 4 0 0 @@ -219,7 +211,6 @@ ALL LEFT | join_use_nulls = 1 14 14 val9 0 14 14 val9 0 ALL RIGHT | join_use_nulls = 1 -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -233,7 +224,6 @@ ALL RIGHT | join_use_nulls = 1 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ALL INNER | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 0 0 2 2 2 2 2 2 0 0 @@ -254,7 +244,6 @@ ALL LEFT | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ALL RIGHT | join_use_nulls = 1 | copmosite key -{% if join_algorithm != 'grace_hash' -%} 2 2 2 2 2 2 0 val4 2 2 2 2 2 2 0 val4 \N \N \N 1 1 1 \N val2 @@ -268,7 +257,6 @@ ALL RIGHT | join_use_nulls = 1 | copmosite key \N \N \N 2 1 2 \N val8 \N \N \N 2 1 \N \N val3 \N \N \N 2 2 \N \N val9 -{% endif -%} ANY INNER | join_use_nulls = 1 4 4 0 0 5 5 0 0 @@ -296,7 +284,6 @@ ANY LEFT | join_use_nulls = 1 13 13 val13 0 14 14 val9 0 ANY RIGHT | join_use_nulls = 1 -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -309,7 +296,6 @@ ANY RIGHT | join_use_nulls = 1 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ANY INNER | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 0 0 ANY LEFT | join_use_nulls = 1 | copmosite key @@ -329,7 +315,6 @@ ANY LEFT | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ANY RIGHT | join_use_nulls = 1 | copmosite key -{% if join_algorithm != 'grace_hash' -%} 2 2 2 2 2 2 0 val4 \N \N \N 1 1 1 \N val2 \N \N \N 1 1 1 \N val7 @@ -342,5 +327,4 @@ ANY RIGHT | join_use_nulls = 1 | copmosite key \N \N \N 2 1 2 \N val8 \N \N \N 2 1 \N \N val3 \N \N \N 2 2 \N \N val9 -{% endif -%} {% endfor -%} diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 index 8b739330364..6500306356c 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 @@ -26,9 +26,7 @@ INSERT INTO t2 'val' || toString(number) as s FROM numbers_mt({{ table_size - 3 }}); - {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%} @@ -38,7 +36,7 @@ SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}10K{% else %}0{% SELECT '-- {{ join_algorithm }} --'; SET join_algorithm = '{{ join_algorithm }}'; -{% for block_size in range(1, table_size + 1) -%} +{% for block_size in range(1, table_size + 1, 4) -%} {% for kind in ['ALL', 'ANY'] -%} SET max_block_size = {{ block_size }}; diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 index 2cc6c6e85d6..df968e86e8d 100644 --- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 +++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 @@ -1,6 +1,6 @@ {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} --- {{ join_algorithm }} --- -{% for block_size in range(1, 11) -%} +{% for block_size in range(1, 11, 4) -%} t1 ALL INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -108,7 +108,6 @@ t1 ALL LEFT JOIN t2 | bs = {{ block_size }} 2 2 val27 5 3 3 val3 4 t1 ALL RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 2 2 5 val22 @@ -161,7 +160,6 @@ t1 ALL RIGHT JOIN t2 | bs = {{ block_size }} 2 2 5 val28 2 2 5 val28 3 3 4 val3 -{% endif -%} t1 ANY INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 2 2 5 5 @@ -177,7 +175,6 @@ t1 ANY LEFT JOIN t2 | bs = {{ block_size }} 2 2 val27 5 3 3 val3 4 t1 ANY RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 2 2 5 val22 @@ -188,9 +185,7 @@ t1 ANY RIGHT JOIN t2 | bs = {{ block_size }} 2 2 5 val27 2 2 5 val28 3 3 4 val3 -{% endif -%} t1 ALL FULL JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 2 2 5 5 @@ -243,9 +238,7 @@ t1 ALL FULL JOIN t2 | bs = {{ block_size }} 2 2 5 5 2 2 5 5 3 3 4 4 -{% endif -%} t1 ALL FULL JOIN USING t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 4 5 1 4 5 2 5 5 @@ -298,7 +291,6 @@ t1 ALL FULL JOIN USING t2 | bs = {{ block_size }} 2 5 5 2 5 5 3 4 4 -{% endif -%} t1 ALL INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -315,7 +307,6 @@ t1 ALL LEFT JOIN tn2 | bs = {{ block_size }} 2 \N val27 0 3 3 val3 4 t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 val22 0 \N 0 val23 0 \N 0 val24 @@ -326,7 +317,6 @@ t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} 1 1 4 val11 1 1 4 val12 3 3 4 val3 -{% endif -%} t1 ANY INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -341,7 +331,6 @@ t1 ANY LEFT JOIN tn2 | bs = {{ block_size }} 2 \N val27 0 3 3 val3 4 t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 val22 0 \N 0 val23 0 \N 0 val24 @@ -352,9 +341,7 @@ t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} 1 1 4 val11 1 1 4 val12 3 3 4 val3 -{% endif -%} t1 ALL FULL JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 5 0 \N 0 5 0 \N 0 5 @@ -372,9 +359,8 @@ t1 ALL FULL JOIN tn2 | bs = {{ block_size }} 2 \N 5 0 2 \N 5 0 3 3 4 4 -{% endif -%} -t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} {% if join_algorithm != 'grace_hash' -%} +t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} 1 4 5 1 4 5 2 5 0 @@ -409,7 +395,6 @@ tn1 ALL LEFT JOIN t2 | bs = {{ block_size }} \N 0 val26 0 \N 0 val27 0 tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -420,7 +405,6 @@ tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }} \N 2 0 val26 \N 2 0 val27 \N 2 0 val28 -{% endif -%} tn1 ANY INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -435,7 +419,6 @@ tn1 ANY LEFT JOIN t2 | bs = {{ block_size }} \N 0 val26 0 \N 0 val27 0 tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -446,9 +429,7 @@ tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }} \N 2 0 val26 \N 2 0 val27 \N 2 0 val28 -{% endif -%} tn1 ALL FULL JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 3 3 4 4 @@ -466,9 +447,7 @@ tn1 ALL FULL JOIN t2 | bs = {{ block_size }} \N 2 0 5 \N 2 0 5 \N 2 0 5 -{% endif -%} tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 4 5 1 4 5 2 0 5 @@ -486,7 +465,6 @@ tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }} \N 5 0 \N 5 0 \N 5 0 -{% endif -%} tn1 ALL INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -503,7 +481,6 @@ tn1 ALL LEFT JOIN tn2 | bs = {{ block_size }} \N \N val26 0 \N \N val27 0 tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -514,7 +491,6 @@ tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} \N \N 0 val26 \N \N 0 val27 \N \N 0 val28 -{% endif -%} tn1 ANY INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -529,7 +505,6 @@ tn1 ANY LEFT JOIN tn2 | bs = {{ block_size }} \N \N val26 0 \N \N val27 0 tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -540,9 +515,7 @@ tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} \N \N 0 val26 \N \N 0 val27 \N \N 0 val28 -{% endif -%} tn1 ALL FULL JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 3 3 4 4 @@ -560,9 +533,8 @@ tn1 ALL FULL JOIN tn2 | bs = {{ block_size }} \N \N 5 0 \N \N 5 0 \N \N 5 0 -{% endif -%} -tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} {% if join_algorithm != 'grace_hash' -%} +tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} 1 4 5 1 4 5 3 4 4 diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 index 613da65421e..f8eb4b1a53e 100644 --- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 +++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 @@ -16,7 +16,6 @@ INSERT INTO t2 VALUES (1, 'val11'), (1, 'val12'), (2, 'val22'), (2, 'val23'), (2 INSERT INTO tn2 VALUES (1, 'val11'), (1, 'val12'), (NULL, 'val22'), (NULL, 'val23'), (NULL, 'val24'), (NULL, 'val25'), (NULL, 'val26'), (NULL, 'val27'), (NULL, 'val28'), (3, 'val3'); {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} @@ -27,7 +26,7 @@ SET join_algorithm = '{{ join_algorithm }}'; SELECT '--- {{ join_algorithm }} ---'; -{% for block_size in range(1, 11) -%} +{% for block_size in range(1, 11, 4) -%} SET max_block_size = {{ block_size }}; {% for t1, t2 in [('t1', 't2'), ('t1', 'tn2'), ('tn1', 't2'), ('tn1', 'tn2')] -%} @@ -47,9 +46,10 @@ SELECT t1.key, t2.key, length(t1.s), t2.s FROM {{ t1 }} AS t1 {{ kind }} RIGHT J SELECT '{{ t1 }} ALL FULL JOIN {{ t2 }} | bs = {{ block_size }}'; SELECT t1.key, t2.key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 {{ kind }} FULL JOIN {{ t2 }} AS t2 ON t1.key == t2.key ORDER BY t1.key, t2.key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }} +{% if join_algorithm == 'full_sorting_merge' or t2 != 'tn2' -%} SELECT '{{ t1 }} ALL FULL JOIN USING {{ t2 }} | bs = {{ block_size }}'; SELECT key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 ALL FULL JOIN {{ t2 }} AS t2 USING (key) ORDER BY key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }} - +{% endif -%} {% endfor -%} {% endfor -%} SET max_bytes_in_join = 0; diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.reference b/tests/queries/0_stateless/02275_full_sort_join_long.reference index 9ec06aea3e6..73482358d12 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.reference +++ b/tests/queries/0_stateless/02275_full_sort_join_long.reference @@ -41,16 +41,34 @@ ALL INNER ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 ALL INNER 500353531835 500353531835 1000342 1000342 1000342 ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 ALL INNER 500353531835 500353531835 1000342 1000342 1000342 ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 index 98cc46c9cb4..0b28fd67050 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 +++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 @@ -22,11 +22,6 @@ INSERT INTO t2 FROM numbers_mt({{ rtable_size }}) ; -{% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } -SELECT 'skipped'; -{% endif -%} -{% endmacro -%} {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} @@ -40,7 +35,6 @@ SET join_algorithm = '{{ join_algorithm }}'; SET max_block_size = {{ block_size }}; -{% if not (kind == 'ANY' and join_algorithm == 'grace_hash') -%} SELECT '{{ kind }} INNER'; SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1 @@ -58,9 +52,8 @@ SELECT '{{ kind }} RIGHT'; SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1 {{ kind }} RIGHT JOIN t2 ON t1.key == t2.key -; {{ is_implemented(join_algorithm) }} +; -{% endif -%} {% endfor -%} {% endfor -%} diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index b563c487646..a6fa0457078 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -16,7 +16,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760" - $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES" + $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02286" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" @@ -31,7 +31,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02286 FORMAT Null" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --multiline --multiquery --query "SYSTEM DROP FILESYSTEM CACHE './data'; --{serverError 36}" + $CLICKHOUSE_CLIENT --multiline --multiquery --query "SYSTEM DROP FILESYSTEM CACHE 'ff'; --{serverError 36}" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" @@ -50,7 +50,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "DROP TABLE test_02286 NO DELAY" + $CLICKHOUSE_CLIENT --query "DROP TABLE test_02286 SYNC" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --query "SELECT cache_path FROM system.filesystem_cache" @@ -77,7 +77,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do $CLICKHOUSE_CLIENT --query "SELECT * FROM test_022862 FORMAT Null" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2/'" + $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2'" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.reference b/tests/queries/0_stateless/02286_mysql_dump_input_format.reference index a736358b9b7..25a78651e33 100644 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.reference +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.reference @@ -12,13 +12,13 @@ 4 \N 5 \N 6 7 +7 6 \N 1 \N 2 \N 3 \N 3 \N 4 \N 5 -7 6 OK 1 2 diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh index 891734e9ad3..1139c1ea68c 100755 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh @@ -9,140 +9,140 @@ USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonex cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)')" -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 0" -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 1" -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 1" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') order by a, b settings input_format_mysql_dump_map_column_names = 0" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') order by y, x settings input_format_mysql_dump_map_column_names = 1" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 1" echo "dump1" $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL' echo "dump2" $CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" echo "dump3" $CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" echo "dump4" $CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" -$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" +$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1" echo "dump5" $CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" echo "dump6" $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" -$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" +$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1" echo "dump7" $CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings max_threads=1" +$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" $CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" -$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" +$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1" echo "dump8" $CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" echo "dump9" $CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" echo "dump10" $CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" echo "dump11" $CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" echo "dump12" $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" -$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" +$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1" echo "dump13" $CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'" -$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'" +$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits', max_threads=1" echo "dump14" $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" -$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" +$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1" echo "dump15" $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump)" -$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump)" +$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" -$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'" +$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" -$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'" +$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1" $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" -$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'" +$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1" rm $USER_FILES_PATH/dump*.sql diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql index 3ff6a5ffbb3..3ea229a1152 100644 --- a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql +++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql @@ -1,3 +1,3 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1; -select * from file(02293_data.arrow); +select * from file(02293_data.arrow) settings max_threads=1; diff --git a/tests/queries/0_stateless/02293_formats_json_columns.reference b/tests/queries/0_stateless/02293_formats_json_columns.reference index f59f02ad42b..d3fb006a73f 100644 --- a/tests/queries/0_stateless/02293_formats_json_columns.reference +++ b/tests/queries/0_stateless/02293_formats_json_columns.reference @@ -86,18 +86,18 @@ d Nullable(String) \N \N 3 \N \N \N \N String OK -3 -2 1 +2 +3 c1 Nullable(Int64) c2 Nullable(Int64) c3 Nullable(String) 1 1 \N -2 2 \N -3 3 \N -1 \N \N -2 \N \N -3 \N \N 1 2 String +1 \N \N +2 2 \N +2 \N \N +3 3 \N +3 \N \N OK OK diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 7a21f8d9bab..0aaf2abfc45 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$USER_FILES_PATH/data_02293 @@ -17,13 +17,13 @@ echo "JSONColumns" $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $DATA_FILE $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by a" echo "JSONCompactColumns" $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3" echo "JSONColumnsWithMetadata" $CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed" @@ -49,9 +49,9 @@ echo ' ' > $DATA_FILE $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by b, a, c, d" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=1" echo ' [ @@ -75,8 +75,8 @@ echo ' ' > $DATA_FILE $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)" -$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3" +$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32') order by a, t" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL' echo ' { diff --git a/tests/queries/0_stateless/02293_grouping_function.sql b/tests/queries/0_stateless/02293_grouping_function.sql index cf076c8e51c..c858eae269d 100644 --- a/tests/queries/0_stateless/02293_grouping_function.sql +++ b/tests/queries/0_stateless/02293_grouping_function.sql @@ -1,3 +1,5 @@ +set optimize_group_by_function_keys=0; + SELECT number, grouping(number, number % 2, number % 3) AS gr diff --git a/tests/queries/0_stateless/02293_grouping_function_group_by.sql b/tests/queries/0_stateless/02293_grouping_function_group_by.sql index d438a8a5277..da6477a1822 100644 --- a/tests/queries/0_stateless/02293_grouping_function_group_by.sql +++ b/tests/queries/0_stateless/02293_grouping_function_group_by.sql @@ -1,3 +1,5 @@ +set optimize_group_by_function_keys=0; + SELECT number, grouping(number, number % 2, number % 3) = 6 diff --git a/tests/queries/0_stateless/02294_anova_cmp.python b/tests/queries/0_stateless/02294_anova_cmp.python index 7597b3712d1..2212a887b2f 100644 --- a/tests/queries/0_stateless/02294_anova_cmp.python +++ b/tests/queries/0_stateless/02294_anova_cmp.python @@ -7,7 +7,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -22,15 +22,22 @@ def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2): client.query("DROP TABLE IF EXISTS anova;") client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;") for group in range(n_groups): - client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''') + client.query( + f"""INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};""" + ) real = client.query_return_df( - '''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''') + """SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;""" + ) - real_f_stat = real['f_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" - assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" + real_f_stat = real["f_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_f_stat - np.float64(f_stat)) < precision + ), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" client.query("DROP TABLE IF EXISTS anova;") diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference index 163f8b0ed5e..5af8c2b743f 100644 --- a/tests/queries/0_stateless/02303_query_kind.reference +++ b/tests/queries/0_stateless/02303_query_kind.reference @@ -1,36 +1,36 @@ -clickhouse-client --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-client --allow_experimental_analyzer=1 --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 -clickhouse-local --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-local --allow_experimental_analyzer=1 --query_kind secondary_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 -clickhouse-client --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-client --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 -clickhouse-local --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy -Expression ((Projection + Before ORDER BY)) +clickhouse-local --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy +Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(dummy) String - Expression (Before GROUP BY) - Header: toString(dummy) String + Header: toString(system.one.dummy_0) String + Expression ((Before GROUP BY + Change column names to column identifiers)) + Header: toString(system.one.dummy_0) String ReadFromStorage (SystemOne) Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02303_query_kind.sh b/tests/queries/0_stateless/02303_query_kind.sh index 5ad5f9ec6f4..1d883a2dcc7 100755 --- a/tests/queries/0_stateless/02303_query_kind.sh +++ b/tests/queries/0_stateless/02303_query_kind.sh @@ -4,6 +4,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function run_query() { echo "clickhouse-client $*" @@ -12,5 +16,5 @@ function run_query() echo "clickhouse-local $*" $CLICKHOUSE_LOCAL "$@" } -run_query --query_kind secondary_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" -run_query --query_kind initial_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" +run_query "${opts[@]}" --query_kind secondary_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" +run_query "${opts[@]}" --query_kind initial_query -q "explain plan header=1 select toString(dummy) as dummy from system.one group by dummy" diff --git a/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql b/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql index 4a0cef35310..a569b6e9b84 100644 --- a/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql +++ b/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql @@ -3,10 +3,10 @@ insert into function file(data_02314.csv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1; insert into function file(data_02314.csv) select number, number + 1, number + 2 from numbers(5); desc file(data_02314.csv) settings input_format_csv_skip_first_lines=5; -select * from file(data_02314.csv) settings input_format_csv_skip_first_lines=5; +select * from file(data_02314.csv) order by c1 settings input_format_csv_skip_first_lines=5; insert into function file(data_02314.tsv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1; insert into function file(data_02314.tsv) select number, number + 1, number + 2 from numbers(5); desc file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5; -select * from file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5; +select * from file(data_02314.tsv) order by c1 settings input_format_tsv_skip_first_lines=5; diff --git a/tests/queries/0_stateless/02315_grouping_constant_folding.reference b/tests/queries/0_stateless/02315_grouping_constant_folding.reference index 6e591de2661..31816318a42 100644 --- a/tests/queries/0_stateless/02315_grouping_constant_folding.reference +++ b/tests/queries/0_stateless/02315_grouping_constant_folding.reference @@ -27,3 +27,17 @@ SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY ROLLUP(a, 5 0 0 2 5 1 0 2 10 0 0 0 +SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SETS ((a, b), (a, a), ()) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0, allow_experimental_analyzer=1; +1 0 0 3 +1 0 2 3 +1 0 4 3 +1 0 6 3 +1 0 8 3 +1 1 1 3 +1 1 3 3 +1 1 5 3 +1 1 7 3 +1 1 9 3 +5 0 0 2 +5 1 0 2 +10 0 0 0 diff --git a/tests/queries/0_stateless/02315_grouping_constant_folding.sql b/tests/queries/0_stateless/02315_grouping_constant_folding.sql index ff259b7be79..f992aa0da32 100644 --- a/tests/queries/0_stateless/02315_grouping_constant_folding.sql +++ b/tests/queries/0_stateless/02315_grouping_constant_folding.sql @@ -9,5 +9,7 @@ SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0; +SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SETS ((a, b), (a, a), ()) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0, allow_experimental_analyzer=1; + -- { echoOff } DROP TABLE test02315; diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql index 00b90bb38b5..9413c664293 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql @@ -74,7 +74,7 @@ select table, partition, name, rows from system.parts where database = currentDa drop table t_light; SELECT '-----Test lightweight delete in multi blocks-----'; -CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0; +CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0, index_granularity=8192, index_granularity_bytes='10Mi'; INSERT INTO t_large SELECT number + 1, number + 1 FROM numbers(100000); DELETE FROM t_large WHERE a = 50000; diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql index 4e9f3db0b96..db0567f252a 100644 --- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql +++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql @@ -80,7 +80,7 @@ select table, partition, name, rows from system.parts where database = currentDa drop table t_light; SELECT '-----Test lightweight delete in multi blocks-----'; -CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0; +CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0, index_granularity=8192, index_granularity_bytes='10Mi'; INSERT INTO t_large SELECT number + 1, number + 1 FROM numbers(100000); DELETE FROM t_large WHERE a = 50000; diff --git a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference index c0911ffc598..d344f57649d 100644 --- a/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference +++ b/tests/queries/0_stateless/02319_no_columns_in_row_level_filter.reference @@ -1,4 +1,4 @@ -1000000 +0 0 0 0 diff --git a/tests/queries/0_stateless/02337_multiple_joins_original_names.reference b/tests/queries/0_stateless/02337_multiple_joins_original_names.reference index 6ed281c757a..e8183f05f5d 100644 --- a/tests/queries/0_stateless/02337_multiple_joins_original_names.reference +++ b/tests/queries/0_stateless/02337_multiple_joins_original_names.reference @@ -1,2 +1,3 @@ 1 1 +1 diff --git a/tests/queries/0_stateless/02337_multiple_joins_original_names.sql b/tests/queries/0_stateless/02337_multiple_joins_original_names.sql index afafee9f8eb..63bbfe6873d 100644 --- a/tests/queries/0_stateless/02337_multiple_joins_original_names.sql +++ b/tests/queries/0_stateless/02337_multiple_joins_original_names.sql @@ -1,11 +1,13 @@ -- https://github.com/ClickHouse/ClickHouse/issues/34697 +SET allow_experimental_analyzer = 1; + SELECT table1_id FROM ( SELECT first.table1_id FROM (SELECT number+1 as table1_id FROM numbers(1)) as first JOIN (SELECT number+1 as table2_id FROM numbers(1)) as second ON first.table1_id = second.table2_id JOIN (SELECT number+1 as table3_id FROM numbers(1)) as third ON first.table1_id = third.table3_id -); -- { serverError UNKNOWN_IDENTIFIER } +); SELECT table1_id FROM ( SELECT first.table1_id diff --git a/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference b/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference index 3ca0d303793..7e263647f98 100644 --- a/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference +++ b/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference @@ -38,6 +38,21 @@ id UInt64 value String 0 Value Database and table qualified matchers +-- +id UInt64 +value String +0 Value +-- +id UInt64 +0 +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value APPLY transformer -- toString(id) String @@ -74,20 +89,20 @@ toString(value) String Value REPLACE transformer -- -5 UInt8 +id UInt8 value String 5 Value -- -5 UInt8 +id UInt8 value String 5 Value -- -5 UInt8 -6 UInt8 +id UInt8 +value UInt8 5 6 -- -5 UInt8 -6 UInt8 +id UInt8 +value UInt8 5 6 Combine EXCEPT, REPLACE, APPLY transformers -- diff --git a/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql b/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql index 9d7c486b28a..a09325fa43b 100644 --- a/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql +++ b/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql @@ -80,27 +80,25 @@ CREATE TABLE 02339_db.test_table INSERT INTO 02339_db.test_table VALUES (0, 'Value'); --- TODO: Qualified COLUMNS where identifier has more than 2 parts are not supported on parser level +SELECT '--'; --- SELECT '--'; +DESCRIBE (SELECT 02339_db.test_table.* FROM 02339_db.test_table); +SELECT 02339_db.test_table.* FROM 02339_db.test_table; --- DESCRIBE (SELECT 02339_db.test_table.* FROM 02339_db.test_table); --- SELECT 02339_db.test_table.* FROM 02339_db.test_table; +SELECT '--'; --- SELECT '--'; +DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table); +SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table; --- DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table); --- SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table; +SELECT '--'; --- SELECT '--'; +DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table); +SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table; --- DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table); --- SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table; +SELECT '--'; --- SELECT '--'; - --- DESCRIBE (SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table); --- SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table; +DESCRIBE (SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table); +SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table; DROP TABLE 02339_db.test_table; DROP DATABASE 02339_db; diff --git a/tests/queries/0_stateless/02342_window_view_different_struct.sql b/tests/queries/0_stateless/02342_window_view_different_struct.sql index c5bf8899cae..a5b2b8daa5a 100644 --- a/tests/queries/0_stateless/02342_window_view_different_struct.sql +++ b/tests/queries/0_stateless/02342_window_view_different_struct.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 0; SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS data_02342; diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference index ec9a394d05d..ca838fdf4e0 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference @@ -92,12 +92,15 @@ ExpressionTransform × 16 (MergingAggregated) Resize 1 → 16 MergingAggregatedTransform - Resize 2 → 1 + Resize 17 → 1 (Union) - (ReadFromStorage) - AggregatingTransform - ExpressionTransform - MergeTreeInOrder 0 → 1 + (Aggregating) + Resize 1 → 16 + AggregatingTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeInOrder 0 → 1 (ReadFromRemote) explain pipeline SELECT k1, k3, sum(value) v FROM remote('127.0.0.{1,2}', currentDatabase(), proj_agg_02343) GROUP BY k1, k3 SETTINGS distributed_aggregation_memory_efficient = 1; (Expression) @@ -109,9 +112,11 @@ ExpressionTransform × 16 Resize 1 → 16 GroupingAggregatedTransform 2 → 1 (Union) - (ReadFromStorage) + (Aggregating) AggregatingTransform + (Expression) ExpressionTransform + (ReadFromMergeTree) MergeTreeInOrder 0 → 1 (ReadFromRemote) -- { echoOn } diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls.sql b/tests/queries/0_stateless/02343_group_by_use_nulls.sql index a979a78be0d..e1d4021a943 100644 --- a/tests/queries/0_stateless/02343_group_by_use_nulls.sql +++ b/tests/queries/0_stateless/02343_group_by_use_nulls.sql @@ -1,3 +1,4 @@ +set optimize_group_by_function_keys=0; -- { echoOn } SELECT number, number % 2, sum(number) AS val FROM numbers(10) diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql b/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql index 15ac1127de7..96485728231 100644 --- a/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql +++ b/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql @@ -1,3 +1,5 @@ +set optimize_group_by_function_keys=0; + -- { echoOn } SELECT number, number % 2, sum(number) AS val FROM remote('127.0.0.{2,3}', numbers(10)) diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index d3bb37af5cf..7561b32bae1 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ -2147483648 1048576 104857600 1 0 0 0 s3_cache/ 0 -2147483648 1048576 104857600 0 0 0 0 s3_cache_2/ 0 +134217728 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 +134217728 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0 diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index fef004cb27f..a687ad01394 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,7 +1,7 @@ -- Tags: no-fasttest, no-parallel -SYSTEM DROP FILESYSTEM CACHE 's3_cache/'; -SYSTEM DROP FILESYSTEM CACHE 's3_cache_2/'; +SYSTEM DROP FILESYSTEM CACHE 's3_cache'; +SYSTEM DROP FILESYSTEM CACHE 's3_cache_2'; DESCRIBE FILESYSTEM CACHE 's3_cache'; DESCRIBE FILESYSTEM CACHE 's3_cache_2'; diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference deleted file mode 100644 index 2ee4f902ba1..00000000000 --- a/tests/queries/0_stateless/02344_show_caches.reference +++ /dev/null @@ -1,14 +0,0 @@ -cached_azure -s3_cache_2 -s3_cache -s3_cache_3 -s3_cache_multi -s3_cache_4 -s3_cache_5 -s3_cache_small_segment_size -local_cache -s3_cache_6 -s3_cache_small -local_cache_2 -local_cache_3 -s3_cache_multi_2 diff --git a/tests/queries/0_stateless/02344_show_caches.sql b/tests/queries/0_stateless/02344_show_caches.sql deleted file mode 100644 index 56f00b89051..00000000000 --- a/tests/queries/0_stateless/02344_show_caches.sql +++ /dev/null @@ -1,2 +0,0 @@ --- Tags: no-fasttest, no-replicated-database, no-cpu-aarch64 -SHOW FILESYSTEM CACHES; diff --git a/tests/queries/0_stateless/02345_implicit_transaction.reference b/tests/queries/0_stateless/02345_implicit_transaction.reference index e4dd35600f7..fb4254ec5a7 100644 --- a/tests/queries/0_stateless/02345_implicit_transaction.reference +++ b/tests/queries/0_stateless/02345_implicit_transaction.reference @@ -12,3 +12,6 @@ in_transaction 10000 out_transaction 0 {"'implicit_True'":"implicit_True","all":"2","is_empty":0} {"'implicit_False'":"implicit_False","all":"2","is_empty":1} +0 +0 +0 diff --git a/tests/queries/0_stateless/02345_implicit_transaction.sql b/tests/queries/0_stateless/02345_implicit_transaction.sql index e3f9cca37d1..b0cb4ab6305 100644 --- a/tests/queries/0_stateless/02345_implicit_transaction.sql +++ b/tests/queries/0_stateless/02345_implicit_transaction.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database +-- Tags: no-ordinary-database, no-fasttest CREATE TABLE landing (n Int64) engine=MergeTree order by n; CREATE TABLE target (n Int64) engine=MergeTree order by n; @@ -92,3 +92,13 @@ WHERE query LIKE '-- Verify that the transaction_id column is NOT populated without transaction%' GROUP BY transaction_id FORMAT JSONEachRow; + +SET implicit_transaction=1; +SET throw_on_unsupported_query_inside_transaction=1; +SELECT * FROM system.one; +SELECT * FROM cluster('test_cluster_interserver_secret', system, one); -- { serverError NOT_IMPLEMENTED } +SELECT * FROM cluster('test_cluster_two_shards', system, one); -- { serverError NOT_IMPLEMENTED } +SET throw_on_unsupported_query_inside_transaction=0; +-- there's not session in the interserver mode +SELECT * FROM cluster('test_cluster_interserver_secret', system, one) FORMAT Null; -- { serverError INVALID_TRANSACTION } +SELECT * FROM cluster('test_cluster_two_shards', system, one); diff --git a/tests/queries/0_stateless/02346_full_text_search.reference b/tests/queries/0_stateless/02346_full_text_search.reference index f1e21e511d0..d6e510b9375 100644 --- a/tests/queries/0_stateless/02346_full_text_search.reference +++ b/tests/queries/0_stateless/02346_full_text_search.reference @@ -1,3 +1,4 @@ +Test inverted(2) af inverted 1 101 Alick a01 @@ -10,6 +11,7 @@ af inverted 113 Click b03 118 Click b08 1 +Test inverted() af inverted 101 Alick a01 106 Alick a06 @@ -21,9 +23,11 @@ af inverted 101 Alick a01 111 Alick b01 1 +Test on array columns af inverted 3 ['Click a03','Click b03'] 1 +Test on map columns af inverted 103 {'Click':'Click a03'} 108 {'Click':'Click a08'} @@ -32,22 +36,13 @@ af inverted 1 103 {'Click':'Click a03'} 1 +Test inverted(2) on a column with two parts af inverted 101 Alick a01 111 Alick b01 201 rick c01 1 +Test inverted(2) on UTF-8 data af inverted 102 clickhouse你好 1 -af inverted -BC614E,05397FB1,6969696969898240,CF3304 -1 -af inverted -1 -1 -af inverted -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02346_full_text_search.sql b/tests/queries/0_stateless/02346_full_text_search.sql index 2b10800e78f..18d1ce0fd96 100644 --- a/tests/queries/0_stateless/02346_full_text_search.sql +++ b/tests/queries/0_stateless/02346_full_text_search.sql @@ -2,7 +2,7 @@ SET allow_experimental_inverted_index = 1; SET log_queries = 1; ---------------------------------------------------- --- Test inverted(2) +SELECT 'Test inverted(2)'; DROP TABLE IF EXISTS tab; @@ -58,7 +58,7 @@ SELECT read_rows==8 from system.query_log LIMIT 1; ---------------------------------------------------- --- Test inverted() +SELECT 'Test inverted()'; DROP TABLE IF EXISTS tab_x; @@ -111,7 +111,7 @@ SELECT read_rows==4 from system.query_log LIMIT 1; ---------------------------------------------------- --- Test on array columns +SELECT 'Test on array columns'; DROP TABLE IF EXISTS tab; @@ -138,7 +138,7 @@ SELECT read_rows==2 from system.query_log LIMIT 1; ---------------------------------------------------- --- Test on map columns +SELECT 'Test on map columns'; DROP TABLE IF EXISTS tab; @@ -178,7 +178,8 @@ SELECT read_rows==8 from system.query_log LIMIT 1; ---------------------------------------------------- --- Test inverted(2) on a column with two parts +SELECT 'Test inverted(2) on a column with two parts'; + DROP TABLE IF EXISTS tab; @@ -206,7 +207,7 @@ SELECT read_rows==6 from system.query_log LIMIT 1; ---------------------------------------------------- --- Test inverted(2) on UTF-8 data +SELECT 'Test inverted(2) on UTF-8 data'; DROP TABLE IF EXISTS tab; @@ -233,105 +234,108 @@ SELECT read_rows==2 from system.query_log AND result_rows==1 LIMIT 1; ----------------------------------------------------- --- Test max_digestion_size_per_segment -DROP TABLE IF EXISTS tab; +-- Tests with parameter max_digestion_size_per_segment are flaky in CI, not clear why --> comment out for the time being: -CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0)) - Engine=MergeTree - ORDER BY (k) - SETTINGS max_digestion_size_per_segment = 1024, index_granularity = 256 - AS - SELECT - number, - format('{},{},{},{}', hex(12345678), hex(87654321), hex(number/17 + 5), hex(13579012)) as s - FROM numbers(10240); - --- check inverted index was created -SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; - --- search inverted index -SELECT s FROM tab WHERE hasToken(s, '6969696969898240'); - --- check the query only read 1 granule (1 row total; each granule has 256 rows) -SYSTEM FLUSH LOGS; -SELECT read_rows==256 from system.query_log - WHERE query_kind ='Select' - AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT s FROM tab WHERE hasToken(s, \'6969696969898240\');') - AND type='QueryFinish' - AND result_rows==1 - LIMIT 1; - ----------------------------------------------------- --- Test density==1 - -DROP TABLE IF EXISTS tab; - -CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0, 1.0)) - Engine=MergeTree - ORDER BY (k) - SETTINGS max_digestion_size_per_segment = 1, index_granularity = 512 - AS - SELECT number, if(number%2, format('happy {}', hex(number)), format('birthday {}', hex(number))) - FROM numbers(1024); - --- check inverted index was created -SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; - --- search inverted index, no row has 'happy birthday' -SELECT count() == 0 FROM tab WHERE s =='happy birthday'; - --- check the query only skip all granules (0 row total; each granule has 512 rows) -SYSTEM FLUSH LOGS; -SELECT read_rows==0 from system.query_log - WHERE query_kind ='Select' - AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT count() == 0 FROM tab WHERE s ==\'happy birthday\';') - AND type='QueryFinish' - AND result_rows==1 - LIMIT 1; - ----------------------------------------------------- --- Test density==0.1 - -DROP TABLE IF EXISTS tab; - -CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0, 0.1)) - Engine=MergeTree - ORDER BY (k) - SETTINGS max_digestion_size_per_segment = 1, index_granularity = 512 - AS - SELECT number, if(number==1023, 'happy new year', if(number%2, format('happy {}', hex(number)), format('birthday {}', hex(number)))) - FROM numbers(1024); - --- check inverted index was created - -SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; - --- search inverted index, no row has 'happy birthday' -SELECT count() == 0 FROM tab WHERE s == 'happy birthday'; - --- check the query does not skip any of the 2 granules(1024 rows total; each granule has 512 rows) -SYSTEM FLUSH LOGS; -SELECT read_rows==1024 from system.query_log - WHERE query_kind ='Select' - AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT count() == 0 FROM tab WHERE s == \'happy birthday\';') - AND type='QueryFinish' - AND result_rows==1 - LIMIT 1; - --- search inverted index, no row has 'happy new year' -SELECT count() == 1 FROM tab WHERE s == 'happy new year'; - --- check the query only read 1 granule because of density (1024 rows total; each granule has 512 rows) -SYSTEM FLUSH LOGS; -SELECT read_rows==512 from system.query_log - WHERE query_kind ='Select' - AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT count() == 1 FROM tab WHERE s == \'happy new year\';') - AND type='QueryFinish' - AND result_rows==1 - LIMIT 1; +-- ---------------------------------------------------- +-- SELECT 'Test max_digestion_size_per_segment'; +-- +-- DROP TABLE IF EXISTS tab; +-- +-- CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0)) +-- Engine=MergeTree +-- ORDER BY (k) +-- SETTINGS max_digestion_size_per_segment = 1024, index_granularity = 256 +-- AS +-- SELECT +-- number, +-- format('{},{},{},{}', hex(12345678), hex(87654321), hex(number/17 + 5), hex(13579012)) as s +-- FROM numbers(10240); +-- +-- -- check inverted index was created +-- SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; +-- +-- -- search inverted index +-- SELECT s FROM tab WHERE hasToken(s, '6969696969898240'); +-- +-- -- check the query only read 1 granule (1 row total; each granule has 256 rows) +-- SYSTEM FLUSH LOGS; +-- SELECT read_rows==256 from system.query_log +-- WHERE query_kind ='Select' +-- AND current_database = currentDatabase() +-- AND endsWith(trimRight(query), 'SELECT s FROM tab WHERE hasToken(s, \'6969696969898240\');') +-- AND type='QueryFinish' +-- AND result_rows==1 +-- LIMIT 1; +-- +-- ---------------------------------------------------- +-- SELECT 'Test density==1'; +-- +-- DROP TABLE IF EXISTS tab; +-- +-- CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0, 1.0)) +-- Engine=MergeTree +-- ORDER BY (k) +-- SETTINGS max_digestion_size_per_segment = 1, index_granularity = 512 +-- AS +-- SELECT number, if(number%2, format('happy {}', hex(number)), format('birthday {}', hex(number))) +-- FROM numbers(1024); +-- +-- -- check inverted index was created +-- SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; +-- +-- -- search inverted index, no row has 'happy birthday' +-- SELECT count() == 0 FROM tab WHERE s =='happy birthday'; +-- +-- -- check the query only skip all granules (0 row total; each granule has 512 rows) +-- SYSTEM FLUSH LOGS; +-- SELECT read_rows==0 from system.query_log +-- WHERE query_kind ='Select' +-- AND current_database = currentDatabase() +-- AND endsWith(trimRight(query), 'SELECT count() == 0 FROM tab WHERE s ==\'happy birthday\';') +-- AND type='QueryFinish' +-- AND result_rows==1 +-- LIMIT 1; +-- +-- ---------------------------------------------------- +-- SELECT 'Test density==0.1'; +-- +-- DROP TABLE IF EXISTS tab; +-- +-- CREATE TABLE tab(k UInt64, s String, INDEX af(s) TYPE inverted(0, 0.1)) +-- Engine=MergeTree +-- ORDER BY (k) +-- SETTINGS max_digestion_size_per_segment = 1, index_granularity = 512 +-- AS +-- SELECT number, if(number==1023, 'happy new year', if(number%2, format('happy {}', hex(number)), format('birthday {}', hex(number)))) +-- FROM numbers(1024); +-- +-- -- check inverted index was created +-- +-- SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; +-- +-- -- search inverted index, no row has 'happy birthday' +-- SELECT count() == 0 FROM tab WHERE s == 'happy birthday'; +-- +-- -- check the query does not skip any of the 2 granules(1024 rows total; each granule has 512 rows) +-- SYSTEM FLUSH LOGS; +-- SELECT read_rows==1024 from system.query_log +-- WHERE query_kind ='Select' +-- AND current_database = currentDatabase() +-- AND endsWith(trimRight(query), 'SELECT count() == 0 FROM tab WHERE s == \'happy birthday\';') +-- AND type='QueryFinish' +-- AND result_rows==1 +-- LIMIT 1; +-- +-- -- search inverted index, no row has 'happy new year' +-- SELECT count() == 1 FROM tab WHERE s == 'happy new year'; +-- +-- -- check the query only read 1 granule because of density (1024 rows total; each granule has 512 rows) +-- SYSTEM FLUSH LOGS; +-- SELECT read_rows==512 from system.query_log +-- WHERE query_kind ='Select' +-- AND current_database = currentDatabase() +-- AND endsWith(trimRight(query), 'SELECT count() == 1 FROM tab WHERE s == \'happy new year\';') +-- AND type='QueryFinish' +-- AND result_rows==1 +-- LIMIT 1; diff --git a/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python b/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python index 399533480a9..7f52daeb408 100644 --- a/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python +++ b/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python @@ -123,10 +123,14 @@ Uses FinishSortingTransform: {} for query in queries: check_query(query["where"], query["order_by"], query["optimize"], False) - check_query(query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"]) + check_query( + query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"] + ) where_columns = [f"bitNot({col})" for col in query["where"]] check_query(where_columns, query["order_by"], query["optimize"], False) - check_query(where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"]) + check_query( + where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"] + ) print("OK") diff --git a/tests/queries/0_stateless/02352_lightweight_delete.sql b/tests/queries/0_stateless/02352_lightweight_delete.sql index e1759e56a3a..b13688282a4 100644 --- a/tests/queries/0_stateless/02352_lightweight_delete.sql +++ b/tests/queries/0_stateless/02352_lightweight_delete.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS lwd_test; -CREATE TABLE lwd_test (id UInt64 , value String) ENGINE MergeTree() ORDER BY id; +CREATE TABLE lwd_test (id UInt64 , value String) ENGINE MergeTree() ORDER BY id SETTINGS index_granularity=8192, index_granularity_bytes='10Mi'; INSERT INTO lwd_test SELECT number, randomString(10) FROM system.numbers LIMIT 1000000; diff --git a/tests/queries/0_stateless/02352_lightweight_delete_and_object_column.reference b/tests/queries/0_stateless/02352_lightweight_delete_and_object_column.reference new file mode 100644 index 00000000000..3ad5abd03ae --- /dev/null +++ b/tests/queries/0_stateless/02352_lightweight_delete_and_object_column.reference @@ -0,0 +1 @@ +99 diff --git a/tests/queries/0_stateless/02352_lightweight_delete_and_object_column.sql b/tests/queries/0_stateless/02352_lightweight_delete_and_object_column.sql new file mode 100644 index 00000000000..cd29d0ac445 --- /dev/null +++ b/tests/queries/0_stateless/02352_lightweight_delete_and_object_column.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS t_obj SYNC; + +SET allow_experimental_object_type=1; + +CREATE TABLE t_obj(id Int32, name Object('json')) ENGINE = MergeTree() ORDER BY id; + +INSERT INTO t_obj select number, '{"a" : "' || toString(number) || '"}' FROM numbers(100); + +DELETE FROM t_obj WHERE id = 10; + +SELECT COUNT() FROM t_obj; + +DROP TABLE t_obj SYNC; + diff --git a/tests/queries/0_stateless/02354_with_statement_non_exist_column.reference b/tests/queries/0_stateless/02354_with_statement_non_exist_column.reference index d00491fd7e5..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02354_with_statement_non_exist_column.reference +++ b/tests/queries/0_stateless/02354_with_statement_non_exist_column.reference @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/02354_with_statement_non_exist_column.sql b/tests/queries/0_stateless/02354_with_statement_non_exist_column.sql index 1a989c1d952..869c335b621 100644 --- a/tests/queries/0_stateless/02354_with_statement_non_exist_column.sql +++ b/tests/queries/0_stateless/02354_with_statement_non_exist_column.sql @@ -1,5 +1,3 @@ -WITH x AS y SELECT 1; - DROP TEMPORARY TABLE IF EXISTS t1; DROP TEMPORARY TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index 44a1bd58d36..5b603133f6c 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -44,8 +44,8 @@ for i in {1..100}; do ")" # Non retriable errors - if [[ $FileSync -ne 7 ]]; then - echo "FileSync: $FileSync != 11" >&2 + if [[ $FileSync -ne 8 ]]; then + echo "FileSync: $FileSync != 8" >&2 exit 2 fi # Check that all files was synced diff --git a/tests/queries/0_stateless/02364_window_view_segfault.sh b/tests/queries/0_stateless/02364_window_view_segfault.sh index d03a1e5ae3e..3def22f4a9e 100755 --- a/tests/queries/0_stateless/02364_window_view_segfault.sh +++ b/tests/queries/0_stateless/02364_window_view_segfault.sh @@ -5,7 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --multiline --query """ +opts=( + "--allow_experimental_analyzer=0" +) + +${CLICKHOUSE_CLIENT} "${opts[@]}" --multiquery --multiline --query """ DROP TABLE IF EXISTS mt ON CLUSTER test_shard_localhost; DROP TABLE IF EXISTS wv ON CLUSTER test_shard_localhost; CREATE TABLE mt ON CLUSTER test_shard_localhost (a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); diff --git a/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh b/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh index 8262cd7eab5..65d0b3f434f 100755 --- a/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh +++ b/tests/queries/0_stateless/02368_cancel_write_into_hdfs.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-stress +# Tags: no-fasttest, no-asan, no-tsan, no-msan, no-ubsan, no-debug +# FIXME https://github.com/ClickHouse/ClickHouse/issues/47207 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02370_lost_part_intersecting_merges.reference b/tests/queries/0_stateless/02370_lost_part_intersecting_merges.reference new file mode 100644 index 00000000000..bc44a664c04 --- /dev/null +++ b/tests/queries/0_stateless/02370_lost_part_intersecting_merges.reference @@ -0,0 +1,8 @@ +1 0 all_0_0_0 +1 1 all_1_2_1 +1 2 all_1_2_1 +0 +3 0 all_0_3_2 +3 1 all_0_3_2 +3 2 all_0_3_2 +3 3 all_0_3_2 diff --git a/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh b/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh new file mode 100755 index 00000000000..bc297cbb963 --- /dev/null +++ b/tests/queries/0_stateless/02370_lost_part_intersecting_merges.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists rmt2 sync;" + +$CLICKHOUSE_CLIENT -q "create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{database}', '1') order by n + settings cleanup_delay_period=0, cleanup_delay_period_random_add=0, old_parts_lifetime=0" +$CLICKHOUSE_CLIENT -q "create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{database}', '2') order by n" + +$CLICKHOUSE_CLIENT -q "system stop replicated sends rmt2" +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt2 values (0);" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt1 values (1);" +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt1 values (2);" + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1 pull;" + +# There's a stupid effect from "zero copy replication": +# MERGE_PARTS all_1_2_1 can be executed by rmt2 even if it was assigned by rmt1 +# After that, rmt2 will not be able to execute that merge and will only try to fetch the part from rmt2 +# But sends are stopped on rmt2... + +(sleep 5 && $CLICKHOUSE_CLIENT -q "system start replicated sends rmt2") & + +$CLICKHOUSE_CLIENT --optimize_throw_if_noop=1 -q "optimize table rmt1;" +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" + +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt1 order by n;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_1_2_1'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -rf $path + +$CLICKHOUSE_CLIENT -q "select * from rmt1;" 2>&1 | grep LOGICAL_ERROR +$CLICKHOUSE_CLIENT --min_bytes_to_use_direct_io=1 --local_filesystem_read_method=pread_threadpool -q "select * from rmt1;" 2>&1 | grep LOGICAL_ERROR + +$CLICKHOUSE_CLIENT -q "select sleep(0.1) from numbers($(($RANDOM % 30))) settings max_block_size=1 format Null" + +$CLICKHOUSE_CLIENT -q "detach table rmt1;" +$CLICKHOUSE_CLIENT -q "attach table rmt1;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt1 values (3);" +$CLICKHOUSE_CLIENT -q "system sync replica rmt1 pull;" +$CLICKHOUSE_CLIENT -q "optimize table rmt1 final;" + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "select 3, *, _part from rmt1 order by n;" + +$CLICKHOUSE_CLIENT -q "drop table rmt1 sync;" +$CLICKHOUSE_CLIENT -q "drop table rmt2 sync;" diff --git a/tests/queries/0_stateless/02371_select_projection_normal_agg.sql b/tests/queries/0_stateless/02371_select_projection_normal_agg.sql index 283aec0b122..8650fb6b843 100644 --- a/tests/queries/0_stateless/02371_select_projection_normal_agg.sql +++ b/tests/queries/0_stateless/02371_select_projection_normal_agg.sql @@ -11,7 +11,8 @@ CREATE TABLE video_log ) ENGINE = MergeTree PARTITION BY toDate(datetime) -ORDER BY (user_id, device_id); +ORDER BY (user_id, device_id) +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; DROP TABLE IF EXISTS rng; @@ -57,7 +58,8 @@ CREATE TABLE video_log_result ) ENGINE = MergeTree PARTITION BY toDate(hour) -ORDER BY sum_bytes; +ORDER BY sum_bytes +SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO video_log_result SELECT toStartOfHour(datetime) AS hour, diff --git a/tests/queries/0_stateless/02377_executable_function_settings.reference b/tests/queries/0_stateless/02377_executable_function_settings.reference index 5eef5774e14..edef1cca7a1 100644 --- a/tests/queries/0_stateless/02377_executable_function_settings.reference +++ b/tests/queries/0_stateless/02377_executable_function_settings.reference @@ -6,3 +6,4 @@ FROM executable(\'\', \'JSON\', \'data String\', SETTINGS max_command_execution_ -------------------- SELECT data FROM executable(\'\', \'JSON\', \'data String\', SETTINGS max_command_execution_time = 100, command_read_timeout = 1) +-------------------- diff --git a/tests/queries/0_stateless/02377_executable_function_settings.sql b/tests/queries/0_stateless/02377_executable_function_settings.sql index be60ad2d89b..ae0dc49c2fc 100644 --- a/tests/queries/0_stateless/02377_executable_function_settings.sql +++ b/tests/queries/0_stateless/02377_executable_function_settings.sql @@ -3,3 +3,7 @@ SELECT '--------------------'; EXPLAIN SYNTAX SELECT * from executable('', 'JSON', 'data String', SETTINGS max_command_execution_time=100); SELECT '--------------------'; EXPLAIN SYNTAX SELECT * from executable('', 'JSON', 'data String', SETTINGS max_command_execution_time=100, command_read_timeout=1); +SELECT '--------------------'; + +SELECT * from executable('JSON', 'data String', SETTINGS max_command_execution_time=100); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT * from executable('JSON', 'data String', 'TEST', 'TEST'); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference index 9d78707429e..69c325c21a9 100644 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference @@ -5,6 +5,12 @@ Sorting (Sorting for ORDER BY) Sorting (Global): a ASC Sorting (None) Sorting (None) +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a +Sorting (Global): a ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): default.optimize_sorting.a_0 ASC +Sorting (None) +Sorting (None) -- disable optimization -> sorting order is NOT propagated from subquery -> full sort -- QUERY: set optimize_sorting_by_input_stream_properties=0;set query_plan_read_in_order=0;set max_threads=3;EXPLAIN PIPELINE SELECT a FROM (SELECT a FROM optimize_sorting) ORDER BY a MergingSortedTransform 3 → 1 @@ -27,12 +33,24 @@ Sorting (Sorting for ORDER BY) Sorting (Global): a ASC Sorting (Stream): a ASC Sorting (Stream): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a +Sorting (Global): a ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): default.optimize_sorting.a_0 ASC +Sorting (Stream): default.optimize_sorting.a_0 ASC +Sorting (Stream): a ASC -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 Sorting (None) Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 +Sorting (None) +Sorting (Sorting for ORDER BY) +Sorting (Global): plus(default.optimize_sorting.a_0, 1_UInt8) ASC +Sorting (Chunk): default.optimize_sorting.a_0 ASC +Sorting (Chunk): a ASC -- ExpressionStep breaks sort mode -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 Sorting (Global): plus(a, 1) ASC @@ -40,28 +58,54 @@ Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (None) Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 +Sorting (Global): plus(a, 1) ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): plus(default.optimize_sorting.a_0, 1_UInt8) ASC +Sorting (None) +Sorting (Chunk): a ASC -- FilterStep preserves sort mode -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0 +Sorting (Chunk): a ASC +Sorting (Chunk): default.optimize_sorting.a_0 ASC +Sorting (Chunk): a ASC -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0 +Sorting (Chunk): a ASC +Sorting (Chunk): default.optimize_sorting.a_0 ASC +Sorting (Chunk): a ASC -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0 +Sorting (Chunk): a ASC +Sorting (Chunk): default.optimize_sorting.a_0 ASC +Sorting (Chunk): a ASC -- FilterStep breaks sort mode -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0 Sorting (None) Sorting (None) Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0 +Sorting (None) +Sorting (None) +Sorting (Chunk): a ASC -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0 Sorting (None) Sorting (None) Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0 +Sorting (None) +Sorting (None) +Sorting (Chunk): a ASC -- aliases break sorting order -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a Sorting (Global): a ASC @@ -72,12 +116,27 @@ Sorting (Sorting for ORDER BY) Sorting (Global): a ASC Sorting (Stream): a ASC Sorting (Stream): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a +Sorting (Global): a ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): a_0 ASC +Sorting (None) +Sorting (Sorting for ORDER BY) +Sorting (Global): default.optimize_sorting.a_2 ASC +Sorting (Stream): default.optimize_sorting.a_2 ASC +Sorting (Stream): a ASC -- aliases DONT break sorting order -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) -Sorting (Global): x ASC, y ASC +Sorting (Global): a ASC, b ASC Sorting (Sorting for ORDER BY) Sorting (Global): x ASC, y ASC +Sorting (Stream): x ASC, y ASC Sorting (Stream): a ASC, b ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) +Sorting (Global): a ASC, b ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): x_2 ASC, y_3 ASC +Sorting (Stream): x_2 ASC, y_3 ASC Sorting (Stream): a ASC, b ASC -- actions chain breaks sorting order: input(column a)->sipHash64(column a)->alias(sipHash64(column a), a)->plus(alias a, 1) -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 @@ -89,6 +148,15 @@ Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 +Sorting (None) +Sorting (Sorting for ORDER BY) +Sorting (Global): plus(a_0, 1_UInt8) ASC +Sorting (Global): plus(default.optimize_sorting.a_3, 1_UInt8) ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): plus(default.optimize_sorting.a_3, 1_UInt8) ASC +Sorting (Chunk): default.optimize_sorting.a_3 ASC +Sorting (Chunk): a ASC -- check that correct sorting info is provided in case of only prefix of sorting key is in ORDER BY clause but all sorting key columns returned by query -- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a Sorting (Global): a ASC @@ -96,3 +164,9 @@ Sorting (Sorting for ORDER BY) Sorting (Global): a ASC Sorting (Stream): a ASC Sorting (Stream): a ASC +-- QUERY (analyzer): set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a +Sorting (Global): a ASC +Sorting (Sorting for ORDER BY) +Sorting (Global): default.optimize_sorting.a_0 ASC +Sorting (Stream): default.optimize_sorting.a_0 ASC +Sorting (Stream): a ASC diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh index 0678ff63e3f..7e937ac42b6 100755 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh @@ -17,9 +17,12 @@ function explain_sorting { echo "-- QUERY: "$1 $CLICKHOUSE_CLIENT -nq "$1" | eval $FIND_SORTING } + function explain_sortmode { echo "-- QUERY: "$1 - $CLICKHOUSE_CLIENT -nq "$1" | eval $FIND_SORTMODE + $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -nq "$1" | eval $FIND_SORTMODE + echo "-- QUERY (analyzer): "$1 + $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -nq "$1" | eval $FIND_SORTMODE } $CLICKHOUSE_CLIENT -q "drop table if exists optimize_sorting sync" @@ -62,10 +65,6 @@ explain_sortmode "$MAKE_OUTPUT_STABLE;EXPLAIN PLAN actions=1, header=1, sorting= echo "-- aliases break sorting order" explain_sortmode "$MAKE_OUTPUT_STABLE;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a" -# FIXME: we still do full sort here, - it's because, for most inner subqueury, sorting description contains original column names but header contains only aliases on those columns: -#| Header: x Int32 │ -#│ y Int32 │ -#│ Sort Mode: Chunk: a ASC, b ASC │ echo "-- aliases DONT break sorting order" explain_sortmode "$MAKE_OUTPUT_STABLE;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)" diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference index 9e72fe0d100..a82d4d4c5d2 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -136,7 +136,7 @@ plus(id, id) UInt64 SELECT '--'; -- DESCRIBE (SELECT test_table.* REPLACE id + (id AS id_alias) AS id, id_alias FROM test_table); -plus(id, id_alias) UInt64 +id UInt64 value String id_alias UInt64 SELECT 'Matcher'; diff --git a/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql b/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql index d61b680bb87..4f52740c498 100644 --- a/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql +++ b/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql @@ -1,8 +1,8 @@ -- Tags: long, replica, no-replicated-database, no-parallel -DROP TABLE IF EXISTS part_log_profile_events_r1 NO DELAY; -DROP TABLE IF EXISTS part_log_profile_events_r2 NO DELAY; +DROP TABLE IF EXISTS part_log_profile_events_r1 SYNC; +DROP TABLE IF EXISTS part_log_profile_events_r2 SYNC; CREATE TABLE part_log_profile_events_r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_02378/part_log_profile_events', 'r1') @@ -36,5 +36,5 @@ WHERE event_time > now() - INTERVAL 10 MINUTE AND event_type == 'DownloadPart' ; -DROP TABLE part_log_profile_events_r1 NO DELAY; -DROP TABLE part_log_profile_events_r2 NO DELAY; +DROP TABLE part_log_profile_events_r1 SYNC; +DROP TABLE part_log_profile_events_r2 SYNC; diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference index bbf288c45d7..31a37862663 100644 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference @@ -2,51 +2,51 @@ Expression Header: key String value String Join - Header: key String - value String + Header: s1.key_0 String + s2.value_1 String Expression - Header: key String + Header: s1.key_0 String ReadFromStorage Header: dummy UInt8 Union - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 Expression Header: key String value String Join - Header: key String - s2.key String - value String + Header: s1.key_0 String + s2.key_2 String + s2.value_1 String Sorting - Header: key String + Header: s1.key_0 String Expression - Header: key String + Header: s1.key_0 String ReadFromStorage Header: dummy UInt8 Sorting - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String Union - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 Expression - Header: s2.key String - value String + Header: s2.key_2 String + s2.value_1 String ReadFromStorage Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql index 4ed6d965292..dfcd8c12e11 100644 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer = 1; SET join_algorithm = 'hash'; EXPLAIN actions=0, description=0, header=1 diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference index 083f0f69dc8..e77afc98007 100644 --- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference +++ b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference @@ -8,7 +8,7 @@ SYSTEM STOP MERGES nopers; INSERT INTO nopers SELECT number, toString(number) FROM numbers(10); SELECT * FROM nopers FORMAT Null; SELECT sum(size) FROM system.filesystem_cache; -194 +195 SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size FROM ( @@ -21,17 +21,18 @@ ON data_paths.cache_path = caches.cache_path ORDER BY file, cache, size; data.bin 0 114 data.mrk3 0 80 +format_version.txt 0 1 DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT sum(size) FROM system.filesystem_cache; -1020 +1021 SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -4 +5 SELECT count() FROM system.filesystem_cache; -4 +5 SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size FROM ( @@ -46,17 +47,18 @@ data.bin 0 114 data.bin 0 746 data.mrk3 0 80 data.mrk3 0_persistent 80 +format_version.txt 0 1 DROP TABLE IF EXISTS test2; CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test2; INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000); SELECT * FROM test2 FORMAT Null; SELECT sum(size) FROM system.filesystem_cache; -794 +795 SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -4 +5 SELECT count() FROM system.filesystem_cache; -4 +5 SELECT extract(local_path, '.*/([\w.]+)') as file, extract(cache_path, '.*/([\w.]+)') as cache, size FROM ( @@ -71,6 +73,7 @@ data.bin 0 114 data.mrk3 0 80 data.mrk3 0_persistent 80 data.mrk3 0_persistent 520 +format_version.txt 0 1 DROP TABLE test; DROP TABLE test2; DROP TABLE nopers; diff --git a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh index 86f95873f14..40487f16551 100755 --- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh +++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh @@ -6,24 +6,24 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM +mkdir -p $UNIQ_DEST_PATH -mkdir -p $USER_FILES_PATH/test_02383 -cp $CURDIR/data_arrow/dictionary*.arrow $USER_FILES_PATH/test_02383/ -cp $CURDIR/data_arrow/corrupted.arrow $USER_FILES_PATH/test_02383/ -cp $CURDIR/data_arrow/dict_with_nulls.arrow $USER_FILES_PATH/test_02383/ +cp $CURDIR/data_arrow/dictionary*.arrow $UNIQ_DEST_PATH/ +cp $CURDIR/data_arrow/corrupted.arrow $UNIQ_DEST_PATH/ +cp $CURDIR/data_arrow/dict_with_nulls.arrow $UNIQ_DEST_PATH/ -$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary1.arrow')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow')" -$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary2.arrow')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow')" -$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary3.arrow')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow')" +$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary1.arrow')" +$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary1.arrow') settings max_threads=1" +$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary2.arrow')" +$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary2.arrow') settings max_threads=1" +$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary3.arrow')" +$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary3.arrow') settings max_threads=1" -$CLICKHOUSE_CLIENT -q "desc file('test_02383/corrupted.arrow')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02383/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL +$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/corrupted.arrow')" +$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL -$CLICKHOUSE_CLIENT -q "desc file('test_02383/dict_with_nulls.arrow')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow')" +$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dict_with_nulls.arrow')" +$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dict_with_nulls.arrow') settings max_threads=1" - -rm -rf $USER_FILES_PATH/test_02383 +rm -rf $UNIQ_DEST_PATH diff --git a/tests/queries/0_stateless/02402_external_disk_mertrics.sql b/tests/queries/0_stateless/02402_external_disk_mertrics.sql index b675c05f45c..e9696eb7122 100644 --- a/tests/queries/0_stateless/02402_external_disk_mertrics.sql +++ b/tests/queries/0_stateless/02402_external_disk_mertrics.sql @@ -20,7 +20,8 @@ SET join_algorithm = 'partial_merge'; SET default_max_bytes_in_join = 0; SET max_bytes_in_join = 10000000; -SELECT number * 200000 as n, j * 2097152 FROM numbers(5) nums +SELECT n, j * 2097152 FROM +(SELECT number * 200000 as n FROM numbers(5)) nums ANY LEFT JOIN ( SELECT number * 2 AS n, number AS j FROM numbers(1000000) ) js2 USING n ORDER BY n diff --git a/tests/queries/0_stateless/02402_merge_engine_with_view.sql b/tests/queries/0_stateless/02402_merge_engine_with_view.sql index 64822784845..ae9de1426e7 100644 --- a/tests/queries/0_stateless/02402_merge_engine_with_view.sql +++ b/tests/queries/0_stateless/02402_merge_engine_with_view.sql @@ -1,7 +1,7 @@ -- #40014 -CREATE TABLE m0 (id UInt64) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1; +CREATE TABLE m0 (id UInt64) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1, ratio_of_defaults_for_sparse_serialization = 1.0; INSERT INTO m0 SELECT number FROM numbers(10); -CREATE TABLE m1 (id UInt64, s String) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1; +CREATE TABLE m1 (id UInt64, s String) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1, ratio_of_defaults_for_sparse_serialization = 1.0; INSERT INTO m1 SELECT number, 'boo' FROM numbers(10); CREATE VIEW m1v AS SELECT id FROM m1; diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 4e2e97e487b..4d2f01db55b 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -8,8 +8,8 @@ TRANSFER_ENCODING_HEADER = "Transfer-Encoding" def main(): - host = os.environ['CLICKHOUSE_HOST'] - port = int(os.environ['CLICKHOUSE_PORT_HTTP']) + host = os.environ["CLICKHOUSE_HOST"] + port = int(os.environ["CLICKHOUSE_PORT_HTTP"]) sock = socket(AF_INET, SOCK_STREAM) sock.connect((host, port)) @@ -47,4 +47,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference index f774abe834d..d9fac433189 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.reference +++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference @@ -113,21 +113,13 @@ ExpressionTransform (Expression) ExpressionTransform × 4 (MergingAggregated) - MergingAggregatedBucketTransform × 4 - Resize 1 → 4 - FinishAggregatingInOrderTransform 3 → 1 - (Union) - (Aggregating) - SortingAggregatedForMemoryBoundMergingTransform 4 → 1 - MergingAggregatedBucketTransform × 4 - Resize 1 → 4 - FinishAggregatingInOrderTransform 4 → 1 - AggregatingInOrderTransform × 4 - (Expression) - ExpressionTransform × 4 - (ReadFromMergeTree) - MergeTreeInOrder × 4 0 → 1 - (ReadFromRemoteParallelReplicas) + Resize 1 → 4 + SortingAggregatedTransform 4 → 1 + MergingAggregatedBucketTransform × 4 + Resize 1 → 4 + GroupingAggregatedTransform 3 → 1 + (Union) + (ReadFromRemoteParallelReplicas) select a, count() from pr_t group by a order by a limit 5 offset 500; 500 1000 501 1000 diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index b6299de9aae..23c29ca8d93 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -61,6 +61,7 @@ create table pr_t(a UInt64, b UInt64) engine=MergeTree order by a; insert into pr_t select number % 1000, number % 1000 from numbers_mt(1e6); set allow_experimental_parallel_reading_from_replicas = 1; +set parallel_replicas_for_non_replicated_merge_tree = 1; set max_parallel_replicas = 3; set use_hedged_requests = 0; set cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; diff --git a/tests/queries/0_stateless/02410_inmemory_wal_cleanup.reference b/tests/queries/0_stateless/02410_inmemory_wal_cleanup.reference deleted file mode 100644 index 6727d83a6f4..00000000000 --- a/tests/queries/0_stateless/02410_inmemory_wal_cleanup.reference +++ /dev/null @@ -1,35 +0,0 @@ --- { echo } - -DROP TABLE IF EXISTS in_memory; -CREATE TABLE in_memory (a UInt32) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000, min_bytes_for_wide_part = 10485760; -INSERT INTO in_memory VALUES (1); -INSERT INTO in_memory VALUES (2); -SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; -all_1_1_0 1 InMemory -all_2_2_0 1 InMemory -SELECT * FROM in_memory ORDER BY a; -1 -2 --- no WAL remove since parts are still in use -DETACH TABLE in_memory; -ATTACH TABLE in_memory; -SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; -all_1_1_0 1 InMemory -all_2_2_0 1 InMemory -SELECT * FROM in_memory ORDER BY a; -1 -2 --- WAL should be removed, since on disk part covers all parts in WAL -OPTIMIZE TABLE in_memory; -DETACH TABLE in_memory; -ATTACH TABLE in_memory; -SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; -all_1_2_1 1 Compact --- check that the WAL will be reinitialized after remove -INSERT INTO in_memory VALUES (3); -DETACH TABLE in_memory; -ATTACH TABLE in_memory; -SELECT * FROM in_memory ORDER BY a; -1 -2 -3 diff --git a/tests/queries/0_stateless/02410_inmemory_wal_cleanup.sql b/tests/queries/0_stateless/02410_inmemory_wal_cleanup.sql deleted file mode 100644 index 7f832d980ba..00000000000 --- a/tests/queries/0_stateless/02410_inmemory_wal_cleanup.sql +++ /dev/null @@ -1,29 +0,0 @@ --- Tags: no-s3-storage - --- { echo } - -DROP TABLE IF EXISTS in_memory; - -CREATE TABLE in_memory (a UInt32) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000, min_bytes_for_wide_part = 10485760; -INSERT INTO in_memory VALUES (1); -INSERT INTO in_memory VALUES (2); -SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; -SELECT * FROM in_memory ORDER BY a; - --- no WAL remove since parts are still in use -DETACH TABLE in_memory; -ATTACH TABLE in_memory; -SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; -SELECT * FROM in_memory ORDER BY a; - --- WAL should be removed, since on disk part covers all parts in WAL -OPTIMIZE TABLE in_memory; -DETACH TABLE in_memory; -ATTACH TABLE in_memory; -SELECT name, active, part_type FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory'; - --- check that the WAL will be reinitialized after remove -INSERT INTO in_memory VALUES (3); -DETACH TABLE in_memory; -ATTACH TABLE in_memory; -SELECT * FROM in_memory ORDER BY a; diff --git a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql index a73993f6a5a..ef339b760aa 100644 --- a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql @@ -2,5 +2,5 @@ -- Please help shorten this list down to zero elements. SELECT name FROM system.table_functions WHERE length(description) < 10 AND name NOT IN ( - 'cosn', 'oss', 'hdfs', 'hdfsCluster', 'hive', 'mysql', 'postgresql', 's3', 's3Cluster', 'sqlite' -- these functions are not enabled in fast test + 'cosn', 'oss', 'hdfs', 'hdfsCluster', 'hive', 'mysql', 'postgresql', 's3', 's3Cluster', 'sqlite', 'urlCluster' -- these functions are not enabled in fast test ) ORDER BY name; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index e41249af54c..7ab26982402 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -97,6 +97,7 @@ arrayCumSum arrayCumSumNonNegative arrayDifference arrayDistinct +arrayDotProduct arrayElement arrayEnumerate arrayEnumerateDense @@ -389,6 +390,7 @@ javaHashUTF16LE joinGet joinGetOrNull jumpConsistentHash +kafkaMurmurHash kostikConsistentHash lcm least @@ -416,16 +418,10 @@ makeDateTime makeDateTime64 map mapAdd -mapApply -mapContains -mapContainsKeyLike -mapExtractKeyLike -mapFilter -mapKeys +mapFromArrays mapPopulateSeries mapSubtract mapUpdate -mapValues match materialize max2 @@ -512,6 +508,7 @@ nullIf nullIn nullInIgnoreSet or +parseDateTime parseDateTime32BestEffort parseDateTime32BestEffortOrNull parseDateTime32BestEffortOrZero @@ -527,6 +524,11 @@ parseDateTimeBestEffortOrZero parseDateTimeBestEffortUS parseDateTimeBestEffortUSOrNull parseDateTimeBestEffortUSOrZero +parseDateTimeInJodaSyntax +parseDateTimeInJodaSyntaxOrNull +parseDateTimeInJodaSyntaxOrZero +parseDateTimeOrNull +parseDateTimeOrZero parseTimeDelta partitionId path @@ -653,6 +655,7 @@ sleep sleepEachRow snowflakeToDateTime snowflakeToDateTime64 +space splitByChar splitByNonAlpha splitByRegexp diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference index dde07d4540d..98827438920 100644 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.reference @@ -1,8 +1,18 @@ -{"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} -{"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const DB::Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} -{"operation_name":"void DB::StorageDistributedDirectoryMonitor::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} -{"operation_name":"void DB::StorageDistributedDirectoryMonitor::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} -{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} -{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} -{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} -{"operation_name":"auto DB::DistributedSink::runWritingJob(DB::DistributedSink::JobReplica &, const DB::Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +===1=== +{"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} +{"operation_name":"void DB::DistributedSink::writeToLocal(const Cluster::ShardInfo &, const Block &, size_t)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +1 +===2=== +{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} +{"operation_name":"void DB::DistributedAsyncInsertDirectoryQueue::processFile(const std::string &)","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +3 +2 +===3=== +{"operation_name":"auto DB::DistributedSink::runWritingJob(JobReplica &, const Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} +{"operation_name":"auto DB::DistributedSink::runWritingJob(JobReplica &, const Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +1 +===4=== +{"operation_name":"auto DB::DistributedSink::runWritingJob(JobReplica &, const Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"1","rows":"1","bytes":"8"} +{"operation_name":"auto DB::DistributedSink::runWritingJob(JobReplica &, const Block &, size_t)::(anonymous class)::operator()() const","cluster":"test_cluster_two_shards_localhost","shard":"2","rows":"1","bytes":"8"} +3 +2 diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh index 9ac5f061d4a..edc3d06e5bf 100755 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh @@ -20,6 +20,7 @@ function insert() -H "tracestate: $4" \ "${CLICKHOUSE_URL}" \ --data @- + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH DISTRIBUTED ${CLICKHOUSE_DATABASE}.dist_opentelemetry" } function check_span() @@ -42,6 +43,22 @@ ${CLICKHOUSE_CLIENT} -nq " ;" } +# +# $1 - OpenTelemetry Trace Id +# $2 - value of insert_distributed_sync +function check_span_kind() +{ +${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; + + SELECT count() + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${1}' + AND kind = '${2}' + ;" +} + # # Prepare tables for tests @@ -57,30 +74,46 @@ CREATE TABLE ${CLICKHOUSE_DATABASE}.local_opentelemetry (key UInt64) Engine=Merg # # test1 # +echo "===1===" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 0 1 "async-insert-writeToLocal" check_span $trace_id +# 1 HTTP SERVER spans +check_span_kind $trace_id 'SERVER' # # test2 # +echo "===2===" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 0 0 "async-insert-writeToRemote" check_span $trace_id +# 3 SERVER spans, 1 for HTTP, 2 for TCP +check_span_kind $trace_id 'SERVER' +# 2 CLIENT spans +check_span_kind $trace_id 'CLIENT' # # test3 # trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 1 1 "sync-insert-writeToLocal" +echo "===3===" check_span $trace_id +# 1 HTTP SERVER spans +check_span_kind $trace_id 'SERVER' # # test4 # +echo "===4===" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); insert $trace_id 1 0 "sync-insert-writeToRemote" check_span $trace_id +# 3 SERVER spans, 1 for HTTP, 2 for TCP +check_span_kind $trace_id 'SERVER' +# 2 CLIENT spans +check_span_kind $trace_id 'CLIENT' # # Cleanup diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.reference b/tests/queries/0_stateless/02420_final_setting_analyzer.reference index ee7c2541bcf..9a03c484765 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.reference +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.reference @@ -108,9 +108,6 @@ select left_table.id,val_left, val_middle, val_right from left_table ORDER BY left_table.id, val_left, val_middle, val_right; 1 c a c 1 c b c --- no distributed tests because it is not currently supported: --- JOIN with remote storages is unsupported. - -- Quite exotic with Merge engine DROP TABLE IF EXISTS table_to_merge_a; DROP TABLE IF EXISTS table_to_merge_b; diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.sql b/tests/queries/0_stateless/02420_final_setting_analyzer.sql index 5937e536239..14c832cfaf5 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.sql +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.sql @@ -79,9 +79,6 @@ select left_table.id,val_left, val_middle, val_right from left_table inner join (SELECT * FROM right_table WHERE id = 1) r on middle_table.id = r.id ORDER BY left_table.id, val_left, val_middle, val_right; --- no distributed tests because it is not currently supported: --- JOIN with remote storages is unsupported. - -- Quite exotic with Merge engine DROP TABLE IF EXISTS table_to_merge_a; DROP TABLE IF EXISTS table_to_merge_b; diff --git a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference index d3d171221e8..71c9a23879f 100644 --- a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference +++ b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.reference @@ -8,3 +8,6 @@ 1 1 1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql index f5978a34061..f5d182be3e3 100644 --- a/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql +++ b/tests/queries/0_stateless/02421_decimal_in_precision_issue_41125.sql @@ -8,16 +8,16 @@ INSERT INTO dtest VALUES ('33', '44.4', '35'); SELECT count() == 0 FROM dtest WHERE a IN toDecimal32('33.3000', 4); SELECT count() == 0 FROM dtest WHERE a IN toDecimal64('33.3000', 4); SELECT count() == 0 FROM dtest WHERE a IN toDecimal128('33.3000', 4); -SELECT count() == 0 FROM dtest WHERE a IN toDecimal256('33.3000', 4); -- { serverError 53 } +SELECT count() == 0 FROM dtest WHERE a IN toDecimal256('33.3000', 4); SELECT count() == 0 FROM dtest WHERE b IN toDecimal32('44.4000', 0); SELECT count() == 0 FROM dtest WHERE b IN toDecimal64('44.4000', 0); SELECT count() == 0 FROM dtest WHERE b IN toDecimal128('44.4000', 0); -SELECT count() == 0 FROM dtest WHERE b IN toDecimal256('44.4000', 0); -- { serverError 53 } +SELECT count() == 0 FROM dtest WHERE b IN toDecimal256('44.4000', 0); SELECT count() == 1 FROM dtest WHERE b IN toDecimal32('44.4000', 4); SELECT count() == 1 FROM dtest WHERE b IN toDecimal64('44.4000', 4); SELECT count() == 1 FROM dtest WHERE b IN toDecimal128('44.4000', 4); -SELECT count() == 1 FROM dtest WHERE b IN toDecimal256('44.4000', 4); -- { serverError 53 } +SELECT count() == 1 FROM dtest WHERE b IN toDecimal256('44.4000', 4); DROP TABLE IF EXISTS dtest; diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh index 6164ff97d9f..7136698d5b7 100755 --- a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -15,13 +15,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # $3 - Query Settings function execute_query() { - # Some queries are supposed to fail, use -f to suppress error messages - echo $2 | ${CLICKHOUSE_CURL_COMMAND} -q -s --max-time 180 \ - -X POST \ - -H "traceparent: 00-$1-5150000000000515-01" \ - -H "tracestate: a\nb cd" \ - "${CLICKHOUSE_URL}&${3}" \ - --data @- + local trace_id=$1 && shift + local ddl_version=$1 && shift + local opts=( + --opentelemetry-traceparent "00-$trace_id-5150000000000515-01" + --opentelemetry-tracestate $'a\nb cd' + --distributed_ddl_output_mode "none" + --distributed_ddl_entry_format_version "$ddl_version" + ) + ${CLICKHOUSE_CLIENT} "${opts[@]}" "$@" } # This function takes following argument: @@ -82,9 +84,9 @@ for ddl_version in 3 4; do echo "===ddl_format_version ${ddl_version}====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); - execute_query $trace_id "CREATE TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" + execute_query $trace_id $ddl_version -q "CREATE TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" - check_span 1 $trace_id "HTTPHandler" + check_span 1 $trace_id "TCPHandler" if [ $cluster_name = "test_shard_localhost" ]; then check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" @@ -106,7 +108,7 @@ for ddl_version in 3 4; do check_span $expected $trace_id "%DDLWorker::processTask%" # For queries that tracing are enabled(format version is 4 or Replicated database engine), there should be two 'query' spans, - # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # one is for the TCPHandler, the other is for the DDL executing in DDLWorker. # # For other format, there should be only one 'query' span if [ $cluster_name = "test_shard_localhost" ]; then @@ -134,9 +136,9 @@ done echo "===exception====" trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); -execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" 2>&1| grep -Fv "UNKNOWN_TABLE" +execute_query $trace_id 4 -q "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" 2>&1 | grep 'DB::Exception ' | grep -Fv "UNKNOWN_TABLE" -check_span 1 $trace_id "HTTPHandler" +check_span 1 $trace_id "TCPHandler" if [ $cluster_name = "test_shard_localhost" ]; then expected=1 @@ -144,11 +146,11 @@ else # For Replicated database it will fail on initiator before enqueueing distributed DDL expected=0 fi -check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" -check_span $expected $trace_id "%DDLWorker::processTask%" +check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}' AND kind = 'PRODUCER'" +check_span $expected $trace_id "%DDLWorker::processTask%" "kind = 'CONSUMER'" if [ $cluster_name = "test_shard_localhost" ]; then - # There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # There should be two 'query' spans, one is for the TCPHandler, the other is for the DDL executing in DDLWorker. # Both of these two spans contain exception expected=2 else diff --git a/tests/queries/0_stateless/02423_drop_memory_parts.reference b/tests/queries/0_stateless/02423_drop_memory_parts.reference deleted file mode 100644 index d69a5f07a05..00000000000 --- a/tests/queries/0_stateless/02423_drop_memory_parts.reference +++ /dev/null @@ -1,14 +0,0 @@ -init state -30 -0_1_1_0 InMemory 10 1 -1_2_2_0 InMemory 10 1 -2_3_3_0 InMemory 10 1 -drop part 0 -20 -1_2_2_0 InMemory 10 1 -2_3_3_0 InMemory 10 1 -detach table -attach table -20 -1_2_2_0 InMemory 10 1 -2_3_3_0 InMemory 10 1 diff --git a/tests/queries/0_stateless/02423_drop_memory_parts.sql b/tests/queries/0_stateless/02423_drop_memory_parts.sql deleted file mode 100644 index 9326f159b0c..00000000000 --- a/tests/queries/0_stateless/02423_drop_memory_parts.sql +++ /dev/null @@ -1,40 +0,0 @@ --- Tags: no-s3-storage - -DROP TABLE IF EXISTS table_in_memory; - -CREATE TABLE table_in_memory -( - `id` UInt64, - `value` UInt64 -) -ENGINE = MergeTree -PARTITION BY id -ORDER BY value -SETTINGS min_bytes_for_wide_part=1000, min_bytes_for_compact_part=900; - -SELECT 'init state'; -INSERT INTO table_in_memory SELECT intDiv(number, 10), number FROM numbers(30); - -SELECT count() FROM table_in_memory; -SELECT name, part_type, rows, active from system.parts -WHERE table='table_in_memory' AND database=currentDatabase(); - -SELECT 'drop part 0'; -ALTER TABLE table_in_memory DROP PARTITION 0; - -SELECT count() FROM table_in_memory; -SELECT name, part_type, rows, active from system.parts -WHERE table='table_in_memory' AND database=currentDatabase() AND active; - -SELECT 'detach table'; -DETACH TABLE table_in_memory; - -SELECT name, part_type, rows, active from system.parts -WHERE table='table_in_memory' AND database=currentDatabase(); - -SELECT 'attach table'; -ATTACH TABLE table_in_memory; - -SELECT count() FROM table_in_memory; -SELECT name, part_type, rows, active from system.parts -WHERE table='table_in_memory' AND database=currentDatabase() and active; diff --git a/tests/queries/0_stateless/02426_orc_bug.reference b/tests/queries/0_stateless/02426_orc_bug.reference index e5ad2b49289..baa88da2158 100644 Binary files a/tests/queries/0_stateless/02426_orc_bug.reference and b/tests/queries/0_stateless/02426_orc_bug.reference differ diff --git a/tests/queries/0_stateless/02426_orc_bug.sh b/tests/queries/0_stateless/02426_orc_bug.sh new file mode 100755 index 00000000000..7a7ad9f1783 --- /dev/null +++ b/tests/queries/0_stateless/02426_orc_bug.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="SELECT arrayJoin([[], [1]]) FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum; + diff --git a/tests/queries/0_stateless/02426_orc_bug.sql b/tests/queries/0_stateless/02426_orc_bug.sql deleted file mode 100644 index 7016f1ceb70..00000000000 --- a/tests/queries/0_stateless/02426_orc_bug.sql +++ /dev/null @@ -1,3 +0,0 @@ --- Tags: no-fasttest - -SELECT arrayJoin([[], [1]]) FORMAT ORC; diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 52a31f53cc1..b73c52c478f 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -19,7 +19,8 @@ ERROR 50 ERROR 10 -ERROR +10 +20 20 ERROR 30 @@ -35,3 +36,7 @@ ERROR 10 20 10 +10 +10 +10 +1 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh index 6118013b665..3abfbfc22fc 100755 --- a/tests/queries/0_stateless/02428_parameterized_view.sh +++ b/tests/queries/0_stateless/02428_parameterized_view.sh @@ -15,13 +15,19 @@ $CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv6" $CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv7" $CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv8" $CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv9" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv10" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv11" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv12" $CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_v1" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02428_Catalog" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog" $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS 02428_trace_view" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 02428_otel_traces_trace_id_ts" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 02428_otel_traces" -$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory" +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/parameterized_view', 'r1') ORDER BY Name" $CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3)" $CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2)" @@ -56,7 +62,8 @@ $CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50)" 2>&1 | grep $CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3" $CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv3(price=10)" -$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}" 2>&1 | grep -Fq "DUPLICATE_COLUMN" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} OR Price={price:UInt64}*2" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv4(price=10) ORDER BY Price" $CLICKHOUSE_CLIENT -q "CREATE DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" $CLICKHOUSE_CLIENT -q "CREATE TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory" @@ -70,7 +77,7 @@ $CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`( $CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8)" $CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8)" -$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE Price={price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}" $CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv5(price=30, quantity=8, limit=1)" $CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv6(price=10)" @@ -83,6 +90,29 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv8(prices=[10,20])" $CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv9 AS SELECT Price FROM test_02428_Catalog WHERE Price IN (10,20) AND Quantity={quantity:UInt64} ORDER BY Price" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv9(quantity=3)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv10 AS SELECT Price FROM test_02428_Catalog WHERE Price={Pri:UInt64} ORDER BY Price" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv10(Pri=10)" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv11 AS SELECT * from ( SELECT Price FROM test_02428_Catalog WHERE Price={price:UInt64} )" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv11(price=10)" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv12 AS SELECT * from ( SELECT Price FROM test_02428_Catalog WHERE Price IN (SELECT number FROM numbers({price:UInt64})) )" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv12(price=11)" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE 02428_otel_traces (TraceId String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/otel_traces', 'r1') ORDER BY TraceId" +$CLICKHOUSE_CLIENT -q "CREATE TABLE 02428_otel_traces_trace_id_ts (TraceId String, Start Timestamp) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/otel_traces_trace_id_ts', 'r1') ORDER BY TraceId" + +$CLICKHOUSE_CLIENT -q "INSERT INTO 02428_otel_traces(TraceId) VALUES ('1')" +$CLICKHOUSE_CLIENT -q "INSERT INTO 02428_otel_traces_trace_id_ts(TraceId, Start) VALUES('1', now())" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW 02428_trace_view AS WITH {trace_id:String} AS trace_id, + ( SELECT min(Start) FROM 02428_otel_traces_trace_id_ts WHERE TraceId = trace_id + ) AS start SELECT + TraceId AS traceID + FROM 02428_otel_traces" +$CLICKHOUSE_CLIENT -q "SELECT * FROM 02428_trace_view(trace_id='1')" + + $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv1" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv2" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv3" @@ -91,8 +121,14 @@ $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv6" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv7" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv8" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv9" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv10" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv11" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv12" $CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_v1" $CLICKHOUSE_CLIENT -q "DROP TABLE test_02428_Catalog" $CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1" $CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog" -$CLICKHOUSE_CLIENT -q "DROP DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" \ No newline at end of file +$CLICKHOUSE_CLIENT -q "DROP DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -q "DROP VIEW 02428_trace_view" +$CLICKHOUSE_CLIENT -q "DROP TABLE 02428_otel_traces_trace_id_ts" +$CLICKHOUSE_CLIENT -q "DROP TABLE 02428_otel_traces" diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 235a3335d9d..88fb2cdf9b1 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET send_logs_level = 'fatal'; + drop table if exists rmt; drop table if exists rmt2; @@ -7,7 +9,7 @@ drop table if exists rmt2; create table rmt (n int, m int, k int) engine=ReplicatedMergeTree('/test/02432/{database}', '1') order by tuple() settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1, max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, - max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_compact_part=0, min_rows_for_compact_part=0; + max_replicated_merges_in_queue=0, max_replicated_mutations_in_queue=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0; insert into rmt(n, m) values (1, 42); insert into rmt(n, m) values (2, 42); @@ -37,7 +39,7 @@ select count(), sum(n), sum(m) from rmt; create table rmt2 (n int, m int, k String) engine=ReplicatedMergeTree('/test/02432/{database}', '2') order by tuple() settings storage_policy = 's3_cache', allow_remote_fs_zero_copy_replication=1, max_part_removal_threads=10, concurrent_part_removal_threshold=1, cleanup_delay_period=1, cleanup_delay_period_random_add=1, - min_bytes_for_compact_part=0, min_rows_for_compact_part=0, max_replicated_merges_in_queue=1, + min_bytes_for_wide_part=0, min_rows_for_wide_part=0, max_replicated_merges_in_queue=1, old_parts_lifetime=0; alter table rmt2 modify column k Nullable(String); diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.reference b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.reference new file mode 100644 index 00000000000..d2475419998 --- /dev/null +++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.reference @@ -0,0 +1,3 @@ +5000000 +5000000 +1 diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh new file mode 100755 index 00000000000..5e2da509314 --- /dev/null +++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# Tags: no-random-settings +# shellcheck disable=SC2009 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv" +export TEST_MARK="02434_insert_${CLICKHOUSE_DATABASE}_" + +$CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE +$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;' +$CLICKHOUSE_CLIENT -q "create table dedup_dist(A Int64) Engine = Distributed('test_cluster_one_shard_two_replicas', currentDatabase(), dedup_test)" + +function insert_data +{ + SETTINGS="query_id=$ID&max_insert_block_size=110000&min_insert_block_size_rows=110000" + # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation) + TRASH_SETTINGS="query_id=$ID&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=110000&max_block_size=10000&min_insert_block_size_bytes=0&min_insert_block_size_rows=110000&max_insert_block_size=110000" + TYPE=$(( RANDOM % 5 )) + + if [[ "$TYPE" -eq 0 ]]; then + # client will send 10000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation) + $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" \ + -q 'insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE + elif [[ "$TYPE" -eq 1 ]]; then + $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" --prefer_localhost_replica="$(( RANDOM % 2))" \ + -q 'insert into dedup_dist settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE + elif [[ "$TYPE" -eq 2 ]]; then + $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE + elif [[ "$TYPE" -eq 3 ]]; then + $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE + else + $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE + fi +} + +export -f insert_data + +ID="02434_insert_init_${CLICKHOUSE_DATABASE}_$RANDOM" +insert_data +$CLICKHOUSE_CLIENT -q "system flush distributed dedup_dist" +$CLICKHOUSE_CLIENT -q 'select count() from dedup_test' + +function thread_insert +{ + # supress "Killed" messages from bash + i=0 + while true; do + export ID="$TEST_MARK$RANDOM-$RANDOM-$i" + bash -c insert_data 2>&1| grep -Fav "Killed" + i=$((i + 1)) + done +} + +function thread_select +{ + while true; do + $CLICKHOUSE_CLIENT -q "with (select count() from dedup_test) as c select throwIf(c != 5000000, 'Expected 5000000 rows, got ' || toString(c)) format Null" + sleep 0.$RANDOM; + done +} + +function thread_cancel +{ + while true; do + SIGNAL="INT" + if (( RANDOM % 2 )); then + SIGNAL="KILL" + fi + PID=$(grep -Fa "$TEST_MARK" /proc/*/cmdline | grep -Fav grep | grep -Eoa "/proc/[0-9]*/cmdline:" | grep -Eo "[0-9]*" | head -1) + if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID"; fi + sleep 0.$RANDOM; + sleep 0.$RANDOM; + sleep 0.$RANDOM; + done +} + +export -f thread_insert; +export -f thread_select; +export -f thread_cancel; + +TIMEOUT=40 + +timeout $TIMEOUT bash -c thread_insert & +timeout $TIMEOUT bash -c thread_select & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +wait + +$CLICKHOUSE_CLIENT -q 'select count() from dedup_test' + +$CLICKHOUSE_CLIENT -q 'system flush logs' + +# Ensure that thread_cancel actually did something +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and ( + message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes', + 'Query was cancelled or a client has unexpectedly dropped the connection') or + message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')" diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference new file mode 100644 index 00000000000..2d32c17ec7c --- /dev/null +++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference @@ -0,0 +1,3 @@ +1000000 +0 +1 diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh new file mode 100755 index 00000000000..8f8e8cc7ee0 --- /dev/null +++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-ordinary-database +# shellcheck disable=SC2009 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv" +export TEST_MARK="02435_insert_${CLICKHOUSE_DATABASE}_" +export SESSION="02435_session_${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -q 'select * from numbers(1000000) format TSV' > $DATA_FILE +$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by sin(A) partition by intDiv(A, 100000)' + +function insert_data +{ + IMPLICIT=$(( RANDOM % 2 )) + SESSION_ID="${SESSION}_$RANDOM.$RANDOM.$NUM" + TXN_SETTINGS="session_id=$SESSION_ID&throw_on_unsupported_query_inside_transaction=0&implicit_transaction=$IMPLICIT" + BEGIN="" + COMMIT="" + SETTINGS="query_id=$ID&$TXN_SETTINGS&max_insert_block_size=110000&min_insert_block_size_rows=110000" + if [[ "$IMPLICIT" -eq 0 ]]; then + $CLICKHOUSE_CURL -sS -d 'begin transaction' "$CLICKHOUSE_URL&$TXN_SETTINGS" + SETTINGS="$SETTINGS&session_check=1" + BEGIN="begin transaction;" + COMMIT=$(echo -ne "\n\ncommit") + fi + + # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation) + TRASH_SETTINGS="$SETTINGS&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_block_size=10000&min_insert_block_size_bytes=0" + TYPE=$(( RANDOM % 6 )) + + if [[ "$TYPE" -eq 0 ]]; then + $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE + elif [[ "$TYPE" -eq 1 ]]; then + $CLICKHOUSE_CURL -sS -X POST -H "Transfer-Encoding: chunked" --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE + elif [[ "$TYPE" -eq 2 ]]; then + $CLICKHOUSE_CURL -sS -F 'file=@-' "$CLICKHOUSE_URL&$TRASH_SETTINGS&file_format=TSV&file_types=UInt64" -X POST --form-string 'query=insert into dedup_test select * from file' < $DATA_FILE + else + # client will send 1000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation) + $CLICKHOUSE_CLIENT --stacktrace --query_id="$ID" --throw_on_unsupported_query_inside_transaction=0 --implicit_transaction="$IMPLICIT" \ + --max_block_size=1000 --max_insert_block_size=1000 --multiquery -q \ + "${BEGIN}insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV$COMMIT" < $DATA_FILE \ + | grep -Fv "Transaction is not in RUNNING state" + fi + + if [[ "$IMPLICIT" -eq 0 ]]; then + $CLICKHOUSE_CURL -sS -d 'commit' "$CLICKHOUSE_URL&$TXN_SETTINGS&close_session=1" 2>&1| grep -Fav "Transaction is not in RUNNING state" + fi +} + +export -f insert_data + +ID="02435_insert_init_${CLICKHOUSE_DATABASE}_$RANDOM" +insert_data 0 +$CLICKHOUSE_CLIENT -q 'select count() from dedup_test' + +function thread_insert +{ + # supress "Killed" messages from bash + i=2 + while true; do + export ID="$TEST_MARK$RANDOM-$RANDOM-$i" + export NUM="$i" + bash -c insert_data 2>&1| grep -Fav "Killed" | grep -Fav "SESSION_IS_LOCKED" | grep -Fav "SESSION_NOT_FOUND" + i=$((i + 1)) + done +} + +function thread_select +{ + while true; do + $CLICKHOUSE_CLIENT --implicit_transaction=1 -q "with (select count() from dedup_test) as c select throwIf(c % 1000000 != 0, 'Expected 1000000 * N rows, got ' || toString(c)) format Null" + sleep 0.$RANDOM; + done +} + +function thread_cancel +{ + while true; do + SIGNAL="INT" + if (( RANDOM % 2 )); then + SIGNAL="KILL" + fi + PID=$(grep -Fa "$TEST_MARK" /proc/*/cmdline | grep -Fav grep | grep -Eoa "/proc/[0-9]*/cmdline:" | grep -Eo "[0-9]*" | head -1) + if [ ! -z "$PID" ]; then kill -s "$SIGNAL" "$PID"; fi + sleep 0.$RANDOM; + done +} + +export -f thread_insert; +export -f thread_select; +export -f thread_cancel; + +TIMEOUT=20 + +timeout $TIMEOUT bash -c thread_insert & +timeout $TIMEOUT bash -c thread_select & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +wait + +$CLICKHOUSE_CLIENT -q 'system flush logs' + +ID="02435_insert_last_${CLICKHOUSE_DATABASE}_$RANDOM" +insert_data 1 + +$CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select throwIf(count() % 1000000 != 0 or count() = 0) from dedup_test' \ + || $CLICKHOUSE_CLIENT -q "select name, rows, active, visible, creation_tid, creation_csn from system.parts where database=currentDatabase();" + +# Ensure that thread_cancel actually did something +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and ( + message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes', + 'Query was cancelled or a client has unexpectedly dropped the connection') or + message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')" + +wait_for_queries_to_finish 30 +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table dedup_test" diff --git a/tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.reference b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.reference similarity index 100% rename from tests/queries/0_stateless/00971_live_view_watch_http_heartbeat.reference rename to tests/queries/0_stateless/02437_drop_mv_restart_replicas.reference diff --git a/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh new file mode 100755 index 00000000000..ca5e1245046 --- /dev/null +++ b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper, race, no-ordinary-database, no-replicated-database +# FIXME remove no-replicated-database tag + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create user u_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT -q "grant all on db_$CLICKHOUSE_DATABASE.* to u_$CLICKHOUSE_DATABASE" + +# For tests with Replicated +ENGINE=$($CLICKHOUSE_CLIENT -q "select replace(engine_full, '$CLICKHOUSE_DATABASE', 'db_$CLICKHOUSE_DATABASE') from system.databases where name='$CLICKHOUSE_DATABASE' format TSVRaw") +export ENGINE + +function thread_ddl() +{ + while true; do + $CLICKHOUSE_CLIENT -q "create database if not exists db_$CLICKHOUSE_DATABASE engine=$ENGINE" + $CLICKHOUSE_CLIENT -q "CREATE TABLE if not exists db_$CLICKHOUSE_DATABASE.test (test String, A Int64, B Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/tables/{database}/test_02124/{table}', '1') ORDER BY tuple();" + $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW if not exists db_$CLICKHOUSE_DATABASE.test_mv_a Engine=ReplicatedMergeTree ('/clickhouse/tables/{database}/test_02124/{table}', '1') order by tuple() AS SELECT test, A, count() c FROM db_$CLICKHOUSE_DATABASE.test group by test, A;" + $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW if not exists db_$CLICKHOUSE_DATABASE.test_mv_b Engine=ReplicatedMergeTree ('/clickhouse/tables/{database}/test_02124/{table}', '1') partition by A order by tuple() AS SELECT test, A, count() c FROM db_$CLICKHOUSE_DATABASE.test group by test, A;" + $CLICKHOUSE_CLIENT -q "CREATE MATERIALIZED VIEW if not exists db_$CLICKHOUSE_DATABASE.test_mv_c Engine=ReplicatedMergeTree ('/clickhouse/tables/{database}/test_02124/{table}', '1') order by tuple() AS SELECT test, A, count() c FROM db_$CLICKHOUSE_DATABASE.test group by test, A;" + sleep 0.$RANDOM; + + # A kind of backoff + timeout 5s $CLICKHOUSE_CLIENT -q "select sleepEachRow(0.1) from system.dropped_tables format Null" 2>/dev/null ||: + + $CLICKHOUSE_CLIENT -q "drop database if exists db_$CLICKHOUSE_DATABASE" + done +} + +function thread_insert() +{ + while true; do + $CLICKHOUSE_CLIENT -q "INSERT INTO db_$CLICKHOUSE_DATABASE.test SELECT 'case1', number%3, rand() FROM numbers(5)" + sleep 0.$RANDOM; + done +} + +function thread_restart() +{ + while true; do + # The simplest way to restart only replicas from a specific database is to use a special user + $CLICKHOUSE_CLIENT --user "u_$CLICKHOUSE_DATABASE" -q "system restart replicas" + sleep 0.$RANDOM; + done +} + +export -f thread_ddl; +export -f thread_insert; +export -f thread_restart; + +TIMEOUT=15 + +timeout $TIMEOUT bash -c thread_ddl 2>&1| grep -Fa "Exception: " | grep -Fv -e "TABLE_IS_DROPPED" -e "UNKNOWN_TABLE" -e "DATABASE_NOT_EMPTY" & +timeout $TIMEOUT bash -c thread_insert 2> /dev/null & +timeout $TIMEOUT bash -c thread_restart 2>&1| grep -Fa "Exception: " | grep -Fv -e "is currently dropped or renamed" & + +wait + +timeout 45s $CLICKHOUSE_CLIENT -q "select sleepEachRow(0.3) from system.dropped_tables format Null" 2>/dev/null ||: + +$CLICKHOUSE_CLIENT -q "drop database if exists db_$CLICKHOUSE_DATABASE" 2>&1| grep -Fa "Exception: " | grep -Fv -e "TABLE_IS_DROPPED" -e "UNKNOWN_TABLE" -e "DATABASE_NOT_EMPTY" ||: diff --git a/tests/queries/0_stateless/02438_sync_replica_lightweight.reference b/tests/queries/0_stateless/02438_sync_replica_lightweight.reference new file mode 100644 index 00000000000..25abaad13e2 --- /dev/null +++ b/tests/queries/0_stateless/02438_sync_replica_lightweight.reference @@ -0,0 +1,15 @@ +GET_PART all_0_0_0 +GET_PART all_1_1_0 +1 1 all_0_0_0 +1 2 all_1_1_0 +MERGE_PARTS all_0_1_1 +3 1 all_0_1_1 +3 2 all_0_1_1 +4 1 all_0_1_1 +4 2 all_0_1_1 +5 1 all_0_2_2 +5 2 all_0_2_2 +5 3 all_0_2_2 +6 1 all_0_2_2 +6 2 all_0_2_2 +6 3 all_0_2_2 diff --git a/tests/queries/0_stateless/02438_sync_replica_lightweight.sql b/tests/queries/0_stateless/02438_sync_replica_lightweight.sql new file mode 100644 index 00000000000..1da48d95d9b --- /dev/null +++ b/tests/queries/0_stateless/02438_sync_replica_lightweight.sql @@ -0,0 +1,42 @@ +-- Tags: no-replicated-database +-- Tag no-replicated-database: different number of replicas + +create table rmt1 (n int) engine=ReplicatedMergeTree('/test/{database}/02438/', '1') order by tuple(); +create table rmt2 (n int) engine=ReplicatedMergeTree('/test/{database}/02438/', '2') order by tuple(); + +system stop replicated sends rmt1; +system stop merges rmt2; + +set insert_keeper_fault_injection_probability=0; + +insert into rmt1 values (1); +insert into rmt1 values (2); +system sync replica rmt2 pull; -- does not wait +select type, new_part_name from system.replication_queue where database=currentDatabase() and table='rmt2' order by new_part_name; +select 1, n, _part from rmt1 order by n; +select 2, n, _part from rmt2 order by n; + +set optimize_throw_if_noop = 1; +system sync replica rmt1 pull; +optimize table rmt1 final; + +system start replicated sends rmt1; +system sync replica rmt2 lightweight; -- waits for fetches, not merges +select type, new_part_name from system.replication_queue where database=currentDatabase() and table='rmt2' order by new_part_name; +select 3, n, _part from rmt1 order by n; +select 4, n, _part from rmt2 order by n; + +system start merges rmt2; +system sync replica rmt2; + +insert into rmt2 values (3); +system sync replica rmt2 pull; +optimize table rmt2 final; + +system sync replica rmt1 strict; + +select 5, n, _part from rmt1 order by n; +select 6, n, _part from rmt2 order by n; + +drop table rmt1; +drop table rmt2; diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference new file mode 100644 index 00000000000..e836994b3aa --- /dev/null +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference @@ -0,0 +1 @@ +/test/02439/s1/default/block_numbers/123 diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql new file mode 100644 index 00000000000..88ce2834d6b --- /dev/null +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -0,0 +1,28 @@ + +drop table if exists rmt; + +create table rmt (n int, m int) engine=ReplicatedMergeTree('/test/02439/{shard}/{database}', '{replica}') partition by n order by n; +insert into rmt select number, number from numbers(50); +insert into rmt values (1, 2); +insert into rmt values (1, 3); +insert into rmt values (1, 4); +insert into rmt values (1, 5); +insert into rmt values (1, 6); +insert into rmt values (1, 7); +insert into rmt values (1, 8); +insert into rmt values (1, 9); +-- there's nothing to merge in all partitions but '1' + +optimize table rmt partition tuple(123); + +set optimize_throw_if_noop=1; +optimize table rmt partition tuple(123); -- { serverError CANNOT_ASSIGN_OPTIMIZE } + +select sleepEachRow(3) as higher_probability_of_reproducing_the_issue format Null; +system flush logs; + +-- it should not list unneeded partitions where we cannot merge anything +select distinct path from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' + and op_num in ('List', 'SimpleList', 'FilteredList') and path not like '%/block_numbers/1'; + +drop table rmt; diff --git a/tests/queries/0_stateless/02440_mutations_finalization.reference b/tests/queries/0_stateless/02440_mutations_finalization.reference new file mode 100644 index 00000000000..c4bad0a3806 --- /dev/null +++ b/tests/queries/0_stateless/02440_mutations_finalization.reference @@ -0,0 +1,5 @@ +0000000000 UPDATE n = 2 WHERE n = 1 ['all_0_0_0'] 0 +1 +0000000000 UPDATE n = 2 WHERE n = 1 ['all_0_0_0'] 0 +2 +0000000000 UPDATE n = 2 WHERE n = 1 [] diff --git a/tests/queries/0_stateless/02440_mutations_finalization.sql b/tests/queries/0_stateless/02440_mutations_finalization.sql new file mode 100644 index 00000000000..c522d8ab9df --- /dev/null +++ b/tests/queries/0_stateless/02440_mutations_finalization.sql @@ -0,0 +1,34 @@ + +create table mut (n int) engine=ReplicatedMergeTree('/test/02440/{database}/mut', '1') order by tuple(); +set insert_keeper_fault_injection_probability=0; +insert into mut values (1); +system stop merges mut; +alter table mut update n = 2 where n = 1; +-- it will create MUTATE_PART entry, but will not execute it + +system sync replica mut pull; +select mutation_id, command, parts_to_do_names, is_done from system.mutations where database=currentDatabase() and table='mut'; + +-- merges (and mutations) will start again after detach/attach, we need to avoid this somehow... +create table tmp (n int) engine=MergeTree order by tuple() settings index_granularity=1; +insert into tmp select * from numbers(1000); +alter table tmp update n = sleepEachRow(1) where 1; +select sleepEachRow(2) as higher_probablility_of_reproducing_the_issue format Null; + +-- it will not execute MUTATE_PART, because another mutation is currently executing (in tmp) +alter table mut modify setting max_number_of_mutations_for_replica=1; +detach table mut; +attach table mut; + +-- mutation should not be finished yet +select * from mut; +select mutation_id, command, parts_to_do_names, is_done from system.mutations where database=currentDatabase() and table='mut'; + +alter table mut modify setting max_number_of_mutations_for_replica=100; +system sync replica mut; + +-- and now it should (is_done may be 0, but it's okay) +select * from mut; +select mutation_id, command, parts_to_do_names from system.mutations where database=currentDatabase() and table='mut'; + +drop table tmp; -- btw, it will check that mutation can be cancelled between blocks on shutdown diff --git a/tests/queries/0_stateless/02441_alter_delete_and_drop_column.reference b/tests/queries/0_stateless/02441_alter_delete_and_drop_column.reference new file mode 100644 index 00000000000..e9858167301 --- /dev/null +++ b/tests/queries/0_stateless/02441_alter_delete_and_drop_column.reference @@ -0,0 +1,2 @@ +MUTATE_PART all_0_0_0_1 ['all_0_0_0'] +1 2 diff --git a/tests/queries/0_stateless/02441_alter_delete_and_drop_column.sql b/tests/queries/0_stateless/02441_alter_delete_and_drop_column.sql new file mode 100644 index 00000000000..9c4697362df --- /dev/null +++ b/tests/queries/0_stateless/02441_alter_delete_and_drop_column.sql @@ -0,0 +1,27 @@ +-- Tags: no-replicated-database + +create table mut (n int, m int, k int) engine=ReplicatedMergeTree('/test/02441/{database}/mut', '1') order by n; +set insert_keeper_fault_injection_probability=0; +insert into mut values (1, 2, 3), (10, 20, 30); + +system stop merges mut; +alter table mut delete where n = 10; + +-- a funny way to wait for a MUTATE_PART to be assigned +select sleepEachRow(2) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( + 'select 1 where ''MUTATE_PART'' not in (select type from system.replication_queue where database=''' || currentDatabase() || ''' and table=''mut'')' + ), 'LineAsString', 's String') settings max_threads=1 format Null; + +alter table mut drop column k settings alter_sync=0; +system sync replica mut pull; + +-- a funny way to wait for ALTER_METADATA to disappear from the replication queue +select sleepEachRow(2) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( + 'select * from system.replication_queue where database=''' || currentDatabase() || ''' and table=''mut'' and type=''ALTER_METADATA''' + ), 'LineAsString', 's String') settings max_threads=1 format Null; + +select type, new_part_name, parts_to_merge from system.replication_queue where database=currentDatabase() and table='mut'; +system start merges mut; +set receive_timeout=30; +system sync replica mut; +select * from mut; diff --git a/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.reference b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.reference new file mode 100644 index 00000000000..4a9341ba3f6 --- /dev/null +++ b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.reference @@ -0,0 +1 @@ +10013 diff --git a/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.sql b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.sql new file mode 100644 index 00000000000..ff3552b2a42 --- /dev/null +++ b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.sql @@ -0,0 +1,21 @@ +-- Tags: no-fasttest + +drop table if exists t1_r1 sync; +drop table if exists t1_r2 sync; +drop table if exists t2 sync; + +create table t1_r1 (x Int32) engine=ReplicatedMergeTree('/test/02442/{database}/t', 'r1') order by x; + +create table t1_r2 (x Int32) engine=ReplicatedMergeTree('/test/02442/{database}/t', 'r2') order by x; + +-- create table with same replica_path as t1_r1 +create table t2 (x Int32) engine=ReplicatedMergeTree('zookeeper2:/test/02442/{database}/t', 'r1') order by x; +drop table t2 sync; + +-- insert data into one replica +insert into t1_r1 select * from generateRandom('x Int32') LIMIT 10013; +system sync replica t1_r2; +select count() from t1_r2; + +drop table t1_r1 sync; +drop table t1_r2 sync; diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index 1d65fe66c6e..f2b41569540 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -6,10 +6,16 @@ t 2 2 2 -rdb_default 1 1 -rdb_default 1 2 2 2 2 +2 +rdb_default 1 1 s1 r1 1 +2 +2 +rdb_default 1 1 s1 r1 1 +rdb_default 1 2 s1 r2 0 +2 +2 t -rdb_default_3 1 1 +rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 4bfd6243c2e..47a6cf10bda 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -13,35 +13,49 @@ $CLICKHOUSE_CLIENT -q "show tables from $db" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from table t" 2>&1| grep -Fac "SYNTAX_ERROR" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db" 2>&1| grep -Fac "There is a local database" +$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's1' from database $db" 2>&1| grep -Fac "There is a local database" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "There is a local database" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" 2>&1| grep -Fac "There is a local database" +$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" 2>&1| grep -Fac "There is a local database" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/'" 2>&1| grep -Fac "does not look like a path of Replicated database" $CLICKHOUSE_CLIENT -q "system drop database replica 's2|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1' from shard 'r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist" +$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from shard 's1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist" $CLICKHOUSE_CLIENT -q "system drop database replica 's2/r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "Invalid replica name" db2="${db}_2" +db3="${db}_3" $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')" $CLICKHOUSE_CLIENT -q "system sync database replica $db" -$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db' order by shard_num, replica_num" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" +$CLICKHOUSE_CLIENT -q "detach database $db3" +$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" +$CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db3.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE + $CLICKHOUSE_CLIENT -q "detach database $db2" +$CLICKHOUSE_CLIENT -q "system sync database replica $db" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' order by shard_num, replica_num" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r2' from database $db" $CLICKHOUSE_CLIENT -q "attach database $db2" 2>/dev/null $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db2.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE $CLICKHOUSE_CLIENT -q "detach database $db" -$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" +$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" $CLICKHOUSE_CLIENT -q "attach database $db" 2>/dev/null $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode" # Suppress style check: current_database=$CLICKHOUSE_DATABASE $CLICKHOUSE_CLIENT -q "show tables from $db" -db3="${db}_3" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" -$CLICKHOUSE_CLIENT -q "system sync database replica $db3" -$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db3'" +db4="${db}_4" +$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db4 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT -q "system sync database replica $db4" +$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db4'" $CLICKHOUSE_CLIENT -q "drop database $db" $CLICKHOUSE_CLIENT -q "drop database $db2" $CLICKHOUSE_CLIENT -q "drop database $db3" +$CLICKHOUSE_CLIENT -q "drop database $db4" diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.reference b/tests/queries/0_stateless/02451_order_by_monotonic.reference index d3de324a7e1..f9f0ef38be1 100644 --- a/tests/queries/0_stateless/02451_order_by_monotonic.reference +++ b/tests/queries/0_stateless/02451_order_by_monotonic.reference @@ -4,19 +4,19 @@ 2022-09-09 12:00:00 0x 2022-09-09 12:00:00 1 2022-09-09 12:00:00 1x - Prefix sort description: toStartOfMinute(t) ASC - Result sort description: toStartOfMinute(t) ASC, c1 ASC - Prefix sort description: toStartOfMinute(t) ASC - Result sort description: toStartOfMinute(t) ASC - Prefix sort description: negate(a) ASC - Result sort description: negate(a) ASC - Prefix sort description: negate(a) ASC, negate(b) ASC - Result sort description: negate(a) ASC, negate(b) ASC - Prefix sort description: a DESC, negate(b) ASC - Result sort description: a DESC, negate(b) ASC - Prefix sort description: negate(a) ASC, b DESC - Result sort description: negate(a) ASC, b DESC - Prefix sort description: negate(a) ASC - Result sort description: negate(a) ASC, b ASC - Prefix sort description: a ASC - Result sort description: a ASC, negate(b) ASC + Prefix sort description: toStartOfMinute(test.t_0) ASC + Result sort description: toStartOfMinute(test.t_0) ASC, test.c1_1 ASC + Prefix sort description: toStartOfMinute(test.t_0) ASC + Result sort description: toStartOfMinute(test.t_0) ASC + Prefix sort description: negate(test.a_0) ASC + Result sort description: negate(test.a_0) ASC + Prefix sort description: negate(test.a_0) ASC, negate(test.b_1) ASC + Result sort description: negate(test.a_0) ASC, negate(test.b_1) ASC + Prefix sort description: test.a_0 DESC, negate(test.b_1) ASC + Result sort description: test.a_0 DESC, negate(test.b_1) ASC + Prefix sort description: negate(test.a_0) ASC, test.b_1 DESC + Result sort description: negate(test.a_0) ASC, test.b_1 DESC + Prefix sort description: negate(test.a_0) ASC + Result sort description: negate(test.a_0) ASC, test.b_1 ASC + Prefix sort description: test.a_0 ASC + Result sort description: test.a_0 ASC, negate(test.b_1) ASC diff --git a/tests/queries/0_stateless/02451_order_by_monotonic.sh b/tests/queries/0_stateless/02451_order_by_monotonic.sh index cc26ba91e1c..7d1356b4445 100755 --- a/tests/queries/0_stateless/02451_order_by_monotonic.sh +++ b/tests/queries/0_stateless/02451_order_by_monotonic.sh @@ -4,37 +4,41 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +opts=( + "--allow_experimental_analyzer=1" +) + function explain_sort_description() { - out=$($CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1") + out=$($CLICKHOUSE_CLIENT "${opts[@]}" --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1") echo "$out" | grep "Prefix sort description:" echo "$out" | grep "Result sort description:" } -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1) +$CLICKHOUSE_CLIENT "${opts[@]}" -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1) AS SELECT '2022-09-09 12:00:00', toString(number % 2) FROM numbers(2) UNION ALL SELECT '2022-09-09 12:00:30', toString(number % 2)|| 'x' FROM numbers(3)" -$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -q "SELECT count() FROM - (SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic GROUP BY s, c1)" +$CLICKHOUSE_CLIENT "${opts[@]}" --optimize_aggregation_in_order=1 -q "SELECT count() FROM + (SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test GROUP BY s, c1)" -$CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1" +$CLICKHOUSE_CLIENT "${opts[@]}" --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test ORDER BY s, c1" -explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1" -explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s" +explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test ORDER BY s, c1" +explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic AS test ORDER BY s" -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "DROP TABLE IF EXISTS t_order_by_monotonic" -$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)" -$CLICKHOUSE_CLIENT -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, -b" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a DESC, -b" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b DESC" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b" -explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY a DESC, -b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a, b DESC" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY -a, b" +explain_sort_description "SELECT * FROM t_order_by_monotonic AS test ORDER BY a, -b" -$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic" +$CLICKHOUSE_CLIENT "${opts[@]}" -q "DROP TABLE IF EXISTS t_order_by_monotonic" diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh index 7506e78455d..5b54666a6a3 100755 --- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh @@ -1,11 +1,15 @@ #!/usr/bin/env bash +# Tags: no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') -cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/ +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=1000000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=100000000" +cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/ + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_threads=1, max_memory_usage=1000000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_threads=1, max_memory_usage=100000000" + +rm $USER_FILES_PATH/10m_rows.csv.xz diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.reference b/tests/queries/0_stateless/02456_async_inserts_logs.reference index efd8a88eca4..ba1b19fb184 100644 --- a/tests/queries/0_stateless/02456_async_inserts_logs.reference +++ b/tests/queries/0_stateless/02456_async_inserts_logs.reference @@ -1,7 +1,10 @@ 5 - Values 21 1 Ok 1 -t_async_inserts_logs JSONEachRow 39 1 Ok 1 -t_async_inserts_logs Values 8 1 Ok 1 -t_async_inserts_logs JSONEachRow 6 0 ParsingError 1 -t_async_inserts_logs Values 6 0 ParsingError 1 -t_async_inserts_logs Values 8 0 FlushError 1 + Values 21 2 1 Ok 1 +t_async_inserts_logs JSONEachRow 39 2 1 Ok 1 +t_async_inserts_logs Values 8 1 1 Ok 1 +t_async_inserts_logs JSONEachRow 6 0 0 ParsingError 1 +t_async_inserts_logs Values 6 0 0 ParsingError 1 +t_async_inserts_logs Values 8 1 0 FlushError 1 +AsyncInsertBytes 1 +AsyncInsertQuery 1 +AsyncInsertRows 1 diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.sh b/tests/queries/0_stateless/02456_async_inserts_logs.sh index 006455e2d42..43cd73d7231 100755 --- a/tests/queries/0_stateless/02456_async_inserts_logs.sh +++ b/tests/queries/0_stateless/02456_async_inserts_logs.sh @@ -30,10 +30,15 @@ ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_async_inserts_logs" ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -q " - SELECT table, format, bytes, empty(exception), status, + SELECT table, format, bytes, rows, empty(exception), status, status = 'ParsingError' ? flush_time_microseconds = 0 : flush_time_microseconds > event_time_microseconds AS time_ok FROM system.asynchronous_insert_log WHERE database = '$CLICKHOUSE_DATABASE' OR query ILIKE 'INSERT INTO FUNCTION%$CLICKHOUSE_DATABASE%' ORDER BY table, status, format" ${CLICKHOUSE_CLIENT} -q "DROP TABLE t_async_inserts_logs" + +${CLICKHOUSE_CLIENT} -q " +SELECT event, value > 0 FROM system.events +WHERE event IN ('AsyncInsertQuery', 'AsyncInsertBytes', 'AsyncInsertRows') +ORDER BY event" diff --git a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference index b918bf2b155..8d744ba7b46 100644 --- a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference +++ b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference @@ -10,6 +10,138 @@ c3 Nullable(Int64) c1 Nullable(Int64) c2 Nullable(Int64) c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 0 0 0 0 0 0 1 2 3 diff --git a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql index 03e8785b24b..6182a1a222e 100644 --- a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql +++ b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql @@ -5,9 +5,34 @@ desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localh desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV'); desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest'); desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers(MyCustomHeader = 'SomeValue')); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers(MyCustomHeader = 'SomeValue'), 'auto'); + select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv') order by c1, c2, c3; select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV') order by c1, c2, c3; select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest') order by c1, c2, c3; select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN) order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers(MyCustomHeader = 'SomeValue')) order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers(MyCustomHeader = 'SomeValue'), 'auto') order by c1, c2, c3; diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index c638b3d2040..696eb01ff7e 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -5,13 +5,13 @@ import os import uuid import json -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) -CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + def writeVarUInt(x, ba): for _ in range(0, 9): - byte = x & 0x7F if x > 0x7F: byte |= 0x80 @@ -24,12 +24,12 @@ def writeVarUInt(x, ba): def writeStringBinary(s, ba): - b = bytes(s, 'utf-8') + b = bytes(s, "utf-8") writeVarUInt(len(s), ba) ba.extend(b) -def readStrict(s, size = 1): +def readStrict(s, size=1): res = bytearray() while size: cur = s.recv(size) @@ -48,18 +48,23 @@ def readUInt(s, size=1): val += res[i] << (i * 8) return val + def readUInt8(s): return readUInt(s) + def readUInt16(s): return readUInt(s, 2) + def readUInt32(s): return readUInt(s, 4) + def readUInt64(s): return readUInt(s, 8) + def readVarUInt(s): x = 0 for i in range(9): @@ -75,25 +80,25 @@ def readVarUInt(s): def readStringBinary(s): size = readVarUInt(s) s = readStrict(s, size) - return s.decode('utf-8') + return s.decode("utf-8") def sendHello(s): ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary('simple native protocol', ba) + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary('default', ba) # user - writeStringBinary('', ba) # pwd + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd s.sendall(ba) def receiveHello(s): p_type = readVarUInt(s) - assert (p_type == 0) # Hello + assert p_type == 0 # Hello server_name = readStringBinary(s) # print("Server name: ", server_name) server_version_major = readVarUInt(s) @@ -111,65 +116,65 @@ def receiveHello(s): def serializeClientInfo(ba, query_id): - writeStringBinary('default', ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary('127.0.0.1:9000', ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary('os_user', ba) # os_user - writeStringBinary('client_hostname', ba) # client_hostname - writeStringBinary('client_name', ba) # client_name + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('', ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry def sendQuery(s, query): ba = bytearray() query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query + writeVarUInt(1, ba) # query writeStringBinary(query_id, ba) - ba.append(1) # INITIAL_QUERY + ba.append(1) # INITIAL_QUERY # client info serializeClientInfo(ba, query_id) - writeStringBinary('', ba) # No settings - writeStringBinary('', ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally s.sendall(ba) def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num def sendEmptyBlock(s): ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns s.sendall(ba) def assertPacket(packet, expected): - assert(packet == expected), packet + assert packet == expected, packet -class Progress(): +class Progress: def __init__(self): # NOTE: this is done in ctor to initialize __dict__ self.read_rows = 0 @@ -198,11 +203,12 @@ class Progress(): def __bool__(self): return ( - self.read_rows > 0 or - self.read_bytes > 0 or - self.total_rows_to_read > 0 or - self.written_rows > 0 or - self.written_bytes > 0) + self.read_rows > 0 + or self.read_bytes > 0 + or self.total_rows_to_read > 0 + or self.written_rows > 0 + or self.written_bytes > 0 + ) def readProgress(s): @@ -219,13 +225,14 @@ def readProgress(s): progress.readPacket(s) return progress + def readException(s): code = readUInt32(s) name = readStringBinary(s) text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def main(): @@ -236,7 +243,10 @@ def main(): receiveHello(s) # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. - sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000") + sendQuery( + s, + "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000", + ) # external tables sendEmptyBlock(s) diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference index 0ca28640270..53abb49c4e1 100644 --- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference +++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference @@ -1,9 +1,12 @@ +\N 0 \N 1 1 2 \N 42 \N 42 \N 42 \N 42 +42 +42 \N \N \N diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql index a609dc361fe..97d493fa031 100644 --- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql +++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql @@ -10,7 +10,7 @@ insert into test select * from file(02458_data.jsonl); insert into test select x, 1 from file(02458_data.jsonl); insert into test select x, y from file(02458_data.jsonl); insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} -insert into test select x, z from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} +insert into test select x, z from file(02458_data.jsonl); insert into test select * from file(02458_data.jsoncompacteachrow); insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} @@ -28,8 +28,8 @@ drop table test; create table test (x Nullable(UInt32)) engine=Memory(); insert into test select * from file(02458_data.jsonl); insert into test select x from file(02458_data.jsonl); -insert into test select y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} -insert into test select y as x from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA} +insert into test select y from file(02458_data.jsonl); +insert into test select y as x from file(02458_data.jsonl); insert into test select c1 from input() format CSV 1,2; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} insert into test select x from input() format JSONEachRow {"x" : null, "y" : 42} diff --git a/tests/queries/0_stateless/02466_distributed_query_profiler.sql b/tests/queries/0_stateless/02466_distributed_query_profiler.sql index 9fc2fe7b4bd..171cc2a7563 100644 --- a/tests/queries/0_stateless/02466_distributed_query_profiler.sql +++ b/tests/queries/0_stateless/02466_distributed_query_profiler.sql @@ -12,10 +12,4 @@ settings -- This is to activate as much signals as possible to trigger EINTR query_profiler_real_time_period_ns=1, -- This is to use MultiplexedConnections - use_hedged_requests=0, - -- This is to make the initiator waiting for cancel packet in MultiplexedConnections::getReplicaForReading() - -- - -- NOTE: that even smaller sleep will be enough to trigger this problem - -- with 100% probability, however just to make it more reliable, increase - -- it to 2 seconds. - sleep_in_receive_cancel_ms=2000; + use_hedged_requests=0; diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python index 37a7280dac2..a942568233c 100644 --- a/tests/queries/0_stateless/02473_multistep_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -4,18 +4,19 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient class Tester: - ''' + """ - Creates test table - Deletes the specified range of rows - Masks another range using row-level policy - Runs some read queries and checks that the results - ''' + """ + def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url @@ -25,10 +26,10 @@ class Tester: self.repro_queries = [] def report_error(self): - print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) - def query(self, query_text, include_in_repro_steps = True, expected_data = None): + def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: @@ -36,113 +37,187 @@ class Tester: error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) - print('Code:', resp.status_code) - print('Result:', resp.text) + print("Code:", resp.status_code) + print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected - if ((not expected_data is None) and (int(result) != len(expected_data))): - print('Expected {} rows, got {}'.format(len(expected_data), result)) - print('Expected data:' + str(expected_data)) + if (not expected_data is None) and (int(result) != len(expected_data)): + print("Expected {} rows, got {}".format(len(expected_data), result)) + print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() - - def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + def check_data( + self, + all_data, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ): all_data_after_delete = all_data[ - ~((all_data.a == 0) & - (all_data.b > delete_range_start) & - (all_data.b <= delete_range_end))] + ~( + (all_data.a == 0) + & (all_data.b > delete_range_start) + & (all_data.b <= delete_range_end) + ) + ] all_data_after_row_policy = all_data_after_delete[ - (all_data_after_delete.b <= row_level_policy_range_start) | - (all_data_after_delete.b > row_level_policy_range_end)] + (all_data_after_delete.b <= row_level_policy_range_start) + | (all_data_after_delete.b > row_level_policy_range_end) + ] - for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value - self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + for to_select in [ + "count()", + "sum(d)", + ]: # Test reading with and without column with default value + self.query( + "SELECT {} FROM tab_02473;".format(to_select), + False, + all_data_after_row_policy, + ) delta = 10 for query_range_start in [0, delta]: - for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + for query_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - (all_data_after_row_policy.b > query_range_start) & - (all_data_after_row_policy.b <= query_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & (all_data_after_row_policy.b > query_range_start) + & (all_data_after_row_policy.b <= query_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - (all_data_after_row_policy.c > query_range_start) & - (all_data_after_row_policy.c <= query_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & (all_data_after_row_policy.c > query_range_start) + & (all_data_after_row_policy.c <= query_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - ((all_data_after_row_policy.c <= query_range_start) | - (all_data_after_row_policy.c > query_range_end))] - self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & ( + (all_data_after_row_policy.c <= query_range_start) + | (all_data_after_row_policy.c > query_range_end) + ) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) - - def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + def run_test( + self, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ): self.repro_queries = [] - self.query(''' + self.query( + """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) - SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( + self.index_granularity + ) + ) - self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + self.query( + "INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});".format( + self.total_rows + ) + ) client = ClickHouseClient() - all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + all_data = client.query_return_df( + "SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;" + ) - self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value - self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + self.query("ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;") self.check_data(all_data, -100, -100, -100, -100) - self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( - delete_range_start, delete_range_end)) + self.query( + "DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};".format( + delete_range_start, delete_range_end + ) + ) self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) - self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( - row_level_policy_range_start, row_level_policy_range_end)) + self.query( + "CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;".format( + row_level_policy_range_start, row_level_policy_range_end + ) + ) - self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + self.check_data( + all_data, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ) - self.query('DROP POLICY policy_tab_02473 ON tab_02473;') - - self.query('DROP TABLE tab_02473;') + self.query("DROP POLICY policy_tab_02473 ON tab_02473;") + self.query("DROP TABLE tab_02473;") def main(): # Set mutations to synchronous mode and enable lightweight DELETE's - url = os.environ['CLICKHOUSE_URL'] + '&max_threads=1' + url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1" - default_index_granularity = 10; + default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests.Session() - for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + for index_granularity in [ + default_index_granularity - 1, + default_index_granularity, + ]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of various size masked by lightweight DELETES # along with ranges of various size masked by row-level policies for delete_range_start in range(0, total_rows, 3 * step): - for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for delete_range_end in range( + delete_range_start + 3 * step, total_rows, 2 * step + ): for row_level_policy_range_start in range(0, total_rows, 3 * step): - for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): - tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + for row_level_policy_range_end in range( + row_level_policy_range_start + 3 * step, total_rows, 2 * step + ): + tester.run_test( + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ) if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.python b/tests/queries/0_stateless/02473_multistep_split_prewhere.python index 41d8a746e11..19444994fd2 100644 --- a/tests/queries/0_stateless/02473_multistep_split_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_split_prewhere.python @@ -4,16 +4,17 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient class Tester: - ''' + """ - Creates test table with multiple integer columns - Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct - ''' + """ + def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url @@ -23,10 +24,10 @@ class Tester: self.repro_queries = [] def report_error(self): - print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) - def query(self, query_text, include_in_repro_steps = True, expected_data = None): + def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: @@ -34,98 +35,150 @@ class Tester: error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) - print('Code:', resp.status_code) - print('Result:', resp.text) + print("Code:", resp.status_code) + print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected - if ((not expected_data is None) and (int(result) != len(expected_data))): - print('Expected {} rows, got {}'.format(len(expected_data), result)) - print('Expected data:' + str(expected_data)) + if (not expected_data is None) and (int(result) != len(expected_data)): + print("Expected {} rows, got {}".format(len(expected_data), result)) + print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() - - def check_data(self, all_data, c_range_start, c_range_end, d_range_start, d_range_end): - for to_select in ['count()', 'sum(e)']: # Test reading with and without column with default value - self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data) + def check_data( + self, all_data, c_range_start, c_range_end, d_range_start, d_range_end + ): + for to_select in [ + "count()", + "sum(e)", + ]: # Test reading with and without column with default value + self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data) delta = 10 for b_range_start in [0, delta]: - for b_range_end in [self.total_rows - delta]: #, self.total_rows]: + for b_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end), False, expected) + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( + to_select, b_range_start, b_range_end + ), + False, + expected, + ) expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end) & - (all_data.c > c_range_start) & - (all_data.c <= c_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end, c_range_start, c_range_end), False, expected) + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + & (all_data.c > c_range_start) + & (all_data.c <= c_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format( + to_select, + b_range_start, + b_range_end, + c_range_start, + c_range_end, + ), + False, + expected, + ) expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end) & - (all_data.c > c_range_start) & - (all_data.c <= c_range_end) & - (all_data.d > d_range_start) & - (all_data.d <= d_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end), False, expected) - + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + & (all_data.c > c_range_start) + & (all_data.c <= c_range_end) + & (all_data.d > d_range_start) + & (all_data.d <= d_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format( + to_select, + b_range_start, + b_range_end, + c_range_start, + c_range_end, + d_range_start, + d_range_end, + ), + False, + expected, + ) def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end): self.repro_queries = [] - self.query(''' + self.query( + """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) - SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( + self.index_granularity + ) + ) - self.query('INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + self.query( + "INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format( + self.total_rows + ) + ) client = ClickHouseClient() - all_data = client.query_return_df("SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;") + all_data = client.query_return_df( + "SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;" + ) - self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value - self.query('ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;') + self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;") - self.check_data(all_data, c_range_start, c_range_end, d_range_start, d_range_end) - - self.query('DROP TABLE tab_02473;') + self.check_data( + all_data, c_range_start, c_range_end, d_range_start, d_range_end + ) + self.query("DROP TABLE tab_02473;") def main(): # Enable multiple prewhere read steps - url = os.environ['CLICKHOUSE_URL'] + '&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1' + url = ( + os.environ["CLICKHOUSE_URL"] + + "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1" + ) - default_index_granularity = 10; + default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests.Session() - for index_granularity in [default_index_granularity-1, default_index_granularity]: + for index_granularity in [default_index_granularity - 1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of columns c and d for c_range_start in range(0, total_rows, int(2.3 * step)): - for c_range_end in range(c_range_start + 3 * step, total_rows, int(2.1 * step)): - for d_range_start in range(int(0.5 * step), total_rows, int(2.7 * step)): - for d_range_end in range(d_range_start + 3 * step, total_rows, int(2.2 * step)): - tester.run_test(c_range_start, c_range_end, d_range_start, d_range_end) + for c_range_end in range( + c_range_start + 3 * step, total_rows, int(2.1 * step) + ): + for d_range_start in range( + int(0.5 * step), total_rows, int(2.7 * step) + ): + for d_range_end in range( + d_range_start + 3 * step, total_rows, int(2.2 * step) + ): + tester.run_test( + c_range_start, c_range_end, d_range_start, d_range_end + ) if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.reference b/tests/queries/0_stateless/02473_optimize_old_parts.reference index 9002d73ff27..7d08cd5bbef 100644 --- a/tests/queries/0_stateless/02473_optimize_old_parts.reference +++ b/tests/queries/0_stateless/02473_optimize_old_parts.reference @@ -4,9 +4,4 @@ With merge any part range 1 With merge partition only 1 -With merge replicated any part range 1 -With merge replicated partition only -1 -With merge partition only and new parts -3 diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sh b/tests/queries/0_stateless/02473_optimize_old_parts.sh new file mode 100755 index 00000000000..0c2dd04d024 --- /dev/null +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Wait for number of parts in table $1 to become $2. +# Print the changed value. If no changes for $3 seconds, prints initial value. +wait_for_number_of_parts() { + for _ in `seq $3` + do + sleep 1 + res=`$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='$1' AND active"` + if [ "$res" -eq "$2" ] + then + echo "$res" + return + fi + done + echo "$res" +} + +$CLICKHOUSE_CLIENT -nmq " +DROP TABLE IF EXISTS test_without_merge; +DROP TABLE IF EXISTS test_with_merge; + +SELECT 'Without merge'; + +CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i SETTINGS merge_selecting_sleep_ms=1000; +INSERT INTO test_without_merge SELECT 1; +INSERT INTO test_without_merge SELECT 2; +INSERT INTO test_without_merge SELECT 3;" + +wait_for_number_of_parts 'test_without_merge' 1 10 + +$CLICKHOUSE_CLIENT -nmq " +DROP TABLE test_without_merge; + +SELECT 'With merge any part range'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=false; +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +INSERT INTO test_with_merge SELECT 3;" + +wait_for_number_of_parts 'test_with_merge' 1 100 + +$CLICKHOUSE_CLIENT -nmq " +DROP TABLE test_with_merge; + +SELECT 'With merge partition only'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=true; +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +INSERT INTO test_with_merge SELECT 3;" + +wait_for_number_of_parts 'test_with_merge' 1 100 + +$CLICKHOUSE_CLIENT -nmq " +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; + +DROP TABLE test_with_merge;" diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sql b/tests/queries/0_stateless/02473_optimize_old_parts.sql deleted file mode 100644 index c2bd37033c1..00000000000 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sql +++ /dev/null @@ -1,87 +0,0 @@ --- Tags: long - -DROP TABLE IF EXISTS test_without_merge; -DROP TABLE IF EXISTS test_with_merge; -DROP TABLE IF EXISTS test_replicated; - -SELECT 'Without merge'; - -CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i; -INSERT INTO test_without_merge SELECT 1; -INSERT INTO test_without_merge SELECT 2; -INSERT INTO test_without_merge SELECT 3; - -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_without_merge' AND active; - -DROP TABLE test_without_merge; - -SELECT 'With merge any part range'; - -CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i -SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=false; -INSERT INTO test_with_merge SELECT 1; -INSERT INTO test_with_merge SELECT 2; -INSERT INTO test_with_merge SELECT 3; - -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; - -DROP TABLE test_with_merge; - -SELECT 'With merge partition only'; - -CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i -SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; -INSERT INTO test_with_merge SELECT 1; -INSERT INTO test_with_merge SELECT 2; -INSERT INTO test_with_merge SELECT 3; - -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; - -DROP TABLE test_with_merge; - -SELECT 'With merge replicated any part range'; - -CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473', 'node') ORDER BY i -SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=false; -INSERT INTO test_replicated SELECT 1; -INSERT INTO test_replicated SELECT 2; -INSERT INTO test_replicated SELECT 3; - -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; - -DROP TABLE test_replicated; - -SELECT 'With merge replicated partition only'; - -CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473_partition_only', 'node') ORDER BY i -SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; -INSERT INTO test_replicated SELECT 1; -INSERT INTO test_replicated SELECT 2; -INSERT INTO test_replicated SELECT 3; - -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; - -DROP TABLE test_replicated; - -SELECT 'With merge partition only and new parts'; - -CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i -SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; -SYSTEM STOP MERGES test_with_merge; --- These three parts will have min_age=6 at the time of merge -INSERT INTO test_with_merge SELECT 1; -INSERT INTO test_with_merge SELECT 2; -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; --- These three parts will have min_age=0 at the time of merge --- and so, nothing will be merged. -INSERT INTO test_with_merge SELECT 3; -SYSTEM START MERGES test_with_merge; - -SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; - -DROP TABLE test_with_merge; diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference index 5922167dc97..f90867d92b1 100644 --- a/tests/queries/0_stateless/02475_bson_each_row_format.reference +++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference @@ -121,7 +121,7 @@ Nullable 2 0 4 -FAIL +OK null Nullable(Int64) 0 \N @@ -191,8 +191,11 @@ tuple Tuple(Nullable(Int64), Nullable(String)) (3,'Hello') (4,'Hello') Map -OK -OK +{1:0,2:1} +{1:1,2:2} +{1:2,2:3} +{1:3,2:4} +{1:4,2:5} {'a':0,'b':1} {'a':1,'b':2} {'a':2,'b':3} diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index b4efea7e326..aa58d27fa50 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -88,7 +88,7 @@ echo "Nullable" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)')" $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32')" -$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" @@ -132,10 +132,10 @@ $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" echo "Map" -$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL" -$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')" -$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)')" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" diff --git a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh index cd99d1fbd02..0702c146426 100755 --- a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh +++ b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh @@ -174,6 +174,15 @@ $CLICKHOUSE_CLIENT -q "SELECT t1.value_ FROM test_table_1 AS t1 INNER JOIN test_ $CLICKHOUSE_CLIENT -q "SELECT t2.value_ FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['t2.value'\]" &>/dev/null; +$CLICKHOUSE_CLIENT -q "SELECT [1] AS a, a.size1 SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ + | grep "Maybe you meant: \['a.size0'\]" &>/dev/null; + +$CLICKHOUSE_CLIENT -q "SELECT ((1))::Tuple(a Tuple(b UInt32)) AS t, t.c SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ + | grep "Maybe you meant: \['t.a'\]" &>/dev/null; + +$CLICKHOUSE_CLIENT -q "SELECT ((1))::Tuple(a Tuple(b UInt32)) AS t, t.a.c SETTINGS allow_experimental_analyzer = 1;" 2>&1 \ + | grep "Maybe you meant: \['t.a.b'\]" &>/dev/null; + $CLICKHOUSE_CLIENT -q "SELECT 1"; $CLICKHOUSE_CLIENT -n -q " diff --git a/tests/queries/0_stateless/02477_age.reference b/tests/queries/0_stateless/02477_age.reference index 249c413d923..4afe3df8ad8 100644 --- a/tests/queries/0_stateless/02477_age.reference +++ b/tests/queries/0_stateless/02477_age.reference @@ -37,7 +37,7 @@ Constant and non-constant arguments -524160 1440 Case insensitive --10 +-1 Dependance of timezones 0 0 diff --git a/tests/queries/0_stateless/02477_age.sql b/tests/queries/0_stateless/02477_age.sql index 9b612276b01..72a692f61d0 100644 --- a/tests/queries/0_stateless/02477_age.sql +++ b/tests/queries/0_stateless/02477_age.sql @@ -45,7 +45,7 @@ SELECT age('minute', materialize(toDate('2017-12-31')), materialize(toDate('2018 SELECT 'Case insensitive'; -SELECT age('year', today(), today() - INTERVAL 10 YEAR); +SELECT age('YeAr', toDate('2017-12-31'), toDate('2016-01-01')); SELECT 'Dependance of timezones'; diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference index dcfcac737c3..84589668d64 100644 --- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference @@ -1,6 +1,62 @@ SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a IN (\'x\', \'y\') +QUERY id: 0 + PROJECTION COLUMNS + a LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality + WHERE + FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + SETTINGS allow_experimental_analyzer=1 +SELECT a +FROM t_logical_expressions_optimizer_low_cardinality +WHERE (a = \'x\') OR (\'y\' = a) +QUERY id: 0 + PROJECTION COLUMNS + a LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality + WHERE + FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + SETTINGS allow_experimental_analyzer=1 SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE (b = 0) OR (b = 1) +QUERY id: 0 + PROJECTION COLUMNS + a LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.t_logical_expressions_optimizer_low_cardinality + WHERE + FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: b, result_type: UInt32, source_id: 3 + CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 8, column_name: b, result_type: UInt32, source_id: 3 + CONSTANT id: 12, constant_value: UInt64_1, constant_value_type: UInt8 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql index be355a05675..14f8ad830e7 100644 --- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql @@ -4,7 +4,11 @@ CREATE TABLE t_logical_expressions_optimizer_low_cardinality (a LowCardinality(S -- LowCardinality case, ignore optimize_min_equality_disjunction_chain_length limit, optimzer applied EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y'; +EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y' SETTINGS allow_experimental_analyzer = 1; +EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR 'y' = a; +EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR 'y' = a SETTINGS allow_experimental_analyzer = 1; -- Non-LowCardinality case, optimizer not applied for short chains EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1; +EXPLAIN QUERY TREE SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1 SETTINGS allow_experimental_analyzer = 1; DROP TABLE t_logical_expressions_optimizer_low_cardinality; diff --git a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql index d4c24b31da2..1845919890c 100644 --- a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql +++ b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS t; create table t (c1 Int64, c2 String, c3 DateTime, c4 Int8, c5 String, c6 String, c7 String, c8 String, c9 String, c10 String, c11 String, c12 String, c13 Int8, c14 Int64, c15 String, c16 String, c17 String, c18 Int64, c19 Int64, c20 Int64) engine ReplicatedMergeTree('/clickhouse/test/{database}/test_02477', '1') order by c18 -SETTINGS allow_remote_fs_zero_copy_replication=1; +SETTINGS allow_remote_fs_zero_copy_replication=1, index_granularity=8092, index_granularity_bytes='10Mi'; insert into t (c1, c18) select number, -number from numbers(2000000); diff --git a/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference b/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference index 6ad2c8ec8db..0755be238e3 100644 --- a/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference +++ b/tests/queries/0_stateless/02478_projection_with_group_by_alter.reference @@ -13,7 +13,7 @@ SELECT c FROM testing ORDER BY e, d; 4 1 3 --- update all colums used by proj_1 +-- update all columns used by proj_1 ALTER TABLE testing UPDATE c = c+1, d = d+2 WHERE True SETTINGS mutations_sync=2; SELECT * FROM system.mutations WHERE database = currentDatabase() AND table = 'testing' AND not is_done; SELECT c FROM testing ORDER BY d; diff --git a/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql b/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql index 9ed644fd7da..600d37956a9 100644 --- a/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql +++ b/tests/queries/0_stateless/02478_projection_with_group_by_alter.sql @@ -25,7 +25,7 @@ OPTIMIZE TABLE testing FINAL; SELECT c FROM testing ORDER BY d; SELECT c FROM testing ORDER BY e, d; --- update all colums used by proj_1 +-- update all columns used by proj_1 ALTER TABLE testing UPDATE c = c+1, d = d+2 WHERE True SETTINGS mutations_sync=2; SELECT * FROM system.mutations WHERE database = currentDatabase() AND table = 'testing' AND not is_done; diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.reference b/tests/queries/0_stateless/02479_mysql_connect_to_self.reference index 573541ac970..f4dd01bc184 100644 --- a/tests/queries/0_stateless/02479_mysql_connect_to_self.reference +++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.reference @@ -1 +1,72 @@ -0 +--- +1 one -1 een +2 two -2 twee +3 three -3 drie +4 four -4 vier +5 five -5 vijf +--- +5 +--- +1 +1 +1 +1 +1 +--- +1 +2 +3 +4 +5 +--- +-5 five +-4 four +-1 one +-3 three +-2 two +--- +-3 three +-1 one +-2 two +-4 four +-5 five +--- +-1 +-3 +-4 +-5 +--- +4 +QUERY id: 0 + PROJECTION COLUMNS + key String + a String + b String + c String + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: key, result_type: String, source_id: 3 + COLUMN id: 4, column_name: a, result_type: String, source_id: 3 + COLUMN id: 5, column_name: b, result_type: String, source_id: 3 + COLUMN id: 6, column_name: c, result_type: String, source_id: 3 + JOIN TREE + TABLE_FUNCTION id: 3, table_function_name: mysql + ARGUMENTS + LIST id: 7, nodes: 5 + CONSTANT id: 8, constant_value: \'127.0.0.1:9004\', constant_value_type: String + CONSTANT id: 9, constant_value: \'default\', constant_value_type: String + EXPRESSION + FUNCTION id: 10, function_name: currentDatabase, function_type: ordinary, result_type: String + IDENTIFIER id: 11, identifier: foo + CONSTANT id: 12, constant_value: \'default\', constant_value_type: String + CONSTANT id: 13, constant_value: \'\', constant_value_type: String + SETTINGS connection_wait_timeout=123 connect_timeout=40123002 read_write_timeout=40123001 connection_pool_size=3 + +SELECT + key AS key, + a AS a, + b AS b, + c AS c +FROM mysql(\'127.0.0.1:9004\', \'default\', foo, \'default\', \'\', SETTINGS connection_wait_timeout = 123, connect_timeout = 40123002, read_write_timeout = 40123001, connection_pool_size = 3) +--- +5 diff --git a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql index 7ff5b3e3382..ff46ebf1ed2 100644 --- a/tests/queries/0_stateless/02479_mysql_connect_to_self.sql +++ b/tests/queries/0_stateless/02479_mysql_connect_to_self.sql @@ -1,4 +1,42 @@ -- Tags: no-fasttest -SELECT * -FROM mysql('127.0.0.1:9004', system, one, 'default', '') -SETTINGS send_logs_level = 'fatal'; -- failed connection tries are ok, if it succeeded after retry. + +SET send_logs_level = 'fatal'; -- failed connection tries are ok, if it succeeded after retry. + +CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog; +INSERT INTO foo VALUES (1, 'one', -1, 'een'), (2, 'two', -2, 'twee'), (3, 'three', -3, 'drie'), (4, 'four', -4, 'vier'), (5, 'five', -5, 'vijf'); + +SET allow_experimental_analyzer = 1; + +SELECT '---'; +SELECT * FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '') ORDER BY key; + +SELECT '---'; +SELECT count() FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', ''); + +SELECT '---'; +SELECT 1 FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', ''); + +SELECT '---'; +SELECT key FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '') ORDER BY key; + +SELECT '---'; +SELECT b, a FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '') ORDER BY a; + +SELECT '---'; +SELECT b, a FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '') ORDER BY c; + +SELECT '---'; +SELECT b FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '') WHERE c != 'twee' ORDER BY b; + +SELECT '---'; +SELECT count() FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '') WHERE c != 'twee'; + +EXPLAIN QUERY TREE dump_ast = 1 +SELECT * FROM mysql( + '127.0.0.1:9004', currentDatabase(), foo, 'default', '', + SETTINGS connection_wait_timeout = 123, connect_timeout = 40123002, read_write_timeout = 40123001, connection_pool_size = 3 +); + +SELECT '---'; +SELECT count() FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '', SETTINGS connection_pool_size = 1); +SELECT count() FROM mysql('127.0.0.1:9004', currentDatabase(), foo, 'default', '', SETTINGS connection_pool_size = 0); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference new file mode 100644 index 00000000000..ed6ac232d9c --- /dev/null +++ b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference @@ -0,0 +1,2 @@ +a \N +1 1 \N diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql new file mode 100644 index 00000000000..2d56e315bd1 --- /dev/null +++ b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql @@ -0,0 +1,11 @@ +drop table if exists test_table; +create table test_table (A Nullable(String), B Nullable(String)) engine MergeTree order by (A,B) settings index_granularity = 1, allow_nullable_key=1; +insert into test_table values ('a', 'b'), ('a', null), (null, 'b'); +select * from test_table where B is null; +drop table test_table; + +DROP TABLE IF EXISTS dm_metric_small2; +CREATE TABLE dm_metric_small2 (`x` Nullable(Int64), `y` Nullable(Int64), `z` Nullable(Int64)) ENGINE = MergeTree() ORDER BY (x, y, z) SETTINGS index_granularity = 1, allow_nullable_key = 1; +INSERT INTO dm_metric_small2 VALUES (1,1,NULL) (1,1,1) (1,2,0) (1,2,1) (1,2,NULL) (1,2,NULL); +SELECT * FROM dm_metric_small2 WHERE (x = 1) AND (y = 1) AND z IS NULL; +DROP TABLE dm_metric_small2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference b/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference deleted file mode 100644 index f0227e1a41e..00000000000 --- a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference +++ /dev/null @@ -1 +0,0 @@ -a \N diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql b/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql deleted file mode 100644 index ad0c09222c2..00000000000 --- a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql +++ /dev/null @@ -1,9 +0,0 @@ -drop table if exists test_table; - -create table test_table (A Nullable(String), B Nullable(String)) engine MergeTree order by (A,B) settings index_granularity = 1, allow_nullable_key=1; - -insert into test_table values ('a', 'b'), ('a', null), (null, 'b'); - -select * from test_table where B is null; - -drop table test_table; diff --git a/tests/queries/0_stateless/02480_tlp_nan.reference b/tests/queries/0_stateless/02480_tlp_nan.reference index ea4aa44fa89..befd1f66564 100644 --- a/tests/queries/0_stateless/02480_tlp_nan.reference +++ b/tests/queries/0_stateless/02480_tlp_nan.reference @@ -1,10 +1,21 @@ +-- {echo} +SELECT sqrt(-1) as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; nan 0 1 0 +SELECT sqrt(-1) as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; nan 0 1 0 +SELECT -inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; -inf 0 1 0 +SELECT -inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; -inf 0 1 0 +SELECT NULL as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; \N \N \N 1 +SELECT NULL as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; \N \N \N 1 +SELECT inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; inf 0 1 0 +SELECT inf as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; inf 0 1 0 +SELECT nan as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=1; nan 0 1 0 +SELECT nan as x, not(x), not(not(x)), (not(x)) IS NULL SETTINGS allow_experimental_analyzer=0; nan 0 1 0 diff --git a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference index d8b62435bfa..bb4eb4ddd75 100644 --- a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference +++ b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference @@ -4,6 +4,7 @@ 0 1 2 200 Aggregating Order: a ASC, c ASC - Sorting (Stream): a ASC, b ASC, c ASC ReadFromMergeTree (default.tab) - Sorting (Stream): a ASC, b ASC, c ASC + Aggregating + Order: default.tab.a_0 ASC, default.tab.c_2 ASC + ReadFromMergeTree (default.tab) diff --git a/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql b/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql index 9c99609ca46..490060dee6f 100644 --- a/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql +++ b/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql @@ -5,4 +5,5 @@ insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) * insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) * 100 from numbers(6); select a, any(b), c, d from tab where b = 1 group by a, c, d order by c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1; -select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%Sorting (Stream)%' or explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%'; +select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%' settings allow_experimental_analyzer=0; +select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%' settings allow_experimental_analyzer=1; diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql index b51233f734c..de9208ef009 100644 --- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql +++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql @@ -1,3 +1,5 @@ +set allow_experimental_analyzer = 1; + EXPLAIN QUERY TREE run_passes=1 SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY GROUPING SETS (((number % 2) * (number % 3)), number % 3, number % 2) diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index 0cea7301ce5..9fd82da1038 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -8,37 +8,47 @@ import time from threading import Thread CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient client = ClickHouseClient() # test table without partition -client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") -client.query(''' +client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC") +client.query( + """ CREATE TABLE t_async_insert_dedup_no_part ( KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') ORDER BY (KeyID) -''') +""" +) -client.query("insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", settings = {"async_insert": 1, "wait_for_async_insert": 1, "insert_keeper_fault_injection_probability": 0}) +client.query( + "insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", + settings={ + "async_insert": 1, + "wait_for_async_insert": 1, + "insert_keeper_fault_injection_probability": 0, + }, +) result = client.query("select count(*) from t_async_insert_dedup_no_part") print(result, flush=True) -client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") +client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC") + # generate data and push to queue def generate_data(q, total_number): old_data = [] max_chunk_size = 30 - partitions = ['2022-11-11 10:10:10', '2022-12-12 10:10:10'] + partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"] last_number = 0 while True: - dup_simulate = random.randint(0,3) + dup_simulate = random.randint(0, 3) # insert old data randomly. 25% of them are dup. if dup_simulate == 0: - last_idx = len(old_data)-1 + last_idx = len(old_data) - 1 if last_idx < 0: continue idx = last_idx - random.randint(0, 50) @@ -53,7 +63,7 @@ def generate_data(q, total_number): end = start + chunk_size if end > total_number: end = total_number - for i in range(start, end+1): + for i in range(start, end + 1): partition = partitions[random.randint(0, 1)] insert_stmt += "('{}', {}),".format(partition, i) insert_stmt = insert_stmt[:-1] @@ -65,33 +75,46 @@ def generate_data(q, total_number): # wait all the tasks is done. q.join() + def fetch_and_insert_data(q, client): while True: insert = q.get() - client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) + client.query( + insert, + settings={ + "async_insert": 1, + "async_insert_deduplicate": 1, + "wait_for_async_insert": 0, + "async_insert_busy_timeout_ms": 1500, + "insert_keeper_fault_injection_probability": 0, + }, + ) q.task_done() sleep_time = random.randint(50, 500) - time.sleep(sleep_time/1000.0) + time.sleep(sleep_time / 1000.0) + # main process -client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY") -client.query(''' +client.query("DROP TABLE IF EXISTS t_async_insert_dedup SYNC") +client.query( + """ CREATE TABLE t_async_insert_dedup ( EventDate DateTime, KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') PARTITION BY toYYYYMM(EventDate) ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1 -''') +""" +) q = queue.Queue(100) total_number = 10000 -gen = Thread(target = generate_data, args = [q, total_number]) +gen = Thread(target=generate_data, args=[q, total_number]) gen.start() for i in range(3): - insert = Thread(target = fetch_and_insert_data, args = [q, client]) + insert = Thread(target=fetch_and_insert_data, args=[q, client]) insert.start() gen.join() @@ -109,7 +132,7 @@ while True: errMsg = f"the size of result is {len(result)}. we expect {total_number}." else: for i in range(total_number): - expect = str(i+1) + expect = str(i + 1) real = result[i] if expect != real: err = True @@ -117,7 +140,7 @@ while True: break # retry several times to get stable results. if err and retry >= 5: - print (errMsg, flush=True) + print(errMsg, flush=True) elif err: retry += 1 continue @@ -125,15 +148,19 @@ while True: print(len(result), flush=True) break -result = client.query("SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'") +result = client.query( + "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" +) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}") -result = client.query("SELECT value FROM system.events where event = 'AsyncInsertCacheHits'") +result = client.query( + "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" +) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}") -client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY") +client.query("DROP TABLE IF EXISTS t_async_insert_dedup SYNC") os._exit(os.EX_OK) diff --git a/tests/queries/0_stateless/02481_low_cardinality_with_short_circuit_functins_mutations.reference b/tests/queries/0_stateless/02481_low_cardinality_with_short_circuit_functins_mutations.reference new file mode 100644 index 00000000000..9a6e97d4503 --- /dev/null +++ b/tests/queries/0_stateless/02481_low_cardinality_with_short_circuit_functins_mutations.reference @@ -0,0 +1,3 @@ +0 xxxx yyyy +1 yyyy yyyy +2 xxxx yyyy diff --git a/tests/queries/0_stateless/02481_low_cardinality_with_short_circuit_functins_mutations.sql b/tests/queries/0_stateless/02481_low_cardinality_with_short_circuit_functins_mutations.sql new file mode 100644 index 00000000000..9d183dde91d --- /dev/null +++ b/tests/queries/0_stateless/02481_low_cardinality_with_short_circuit_functins_mutations.sql @@ -0,0 +1,14 @@ +drop table if exists issue_46128; + +create table issue_46128 ( + id Int64, + a LowCardinality(Nullable(String)), + b LowCardinality(Nullable(String)) +) Engine = MergeTree order by id +as select number%100, 'xxxx', 'yyyy' from numbers(10); + +ALTER TABLE issue_46128 UPDATE a = b WHERE id= 1 settings mutations_sync=2; + +select * from issue_46128 where id <= 2 order by id; + +drop table issue_46128; diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference index dd677873c7c..348408a15cc 100644 --- a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference +++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference @@ -1,4 +1,5 @@ -- { echoOn } + SELECT cutURLParameter('http://bigmir.net/?a=b&c=d', []), cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']), @@ -30,7 +31,7 @@ SELECT FORMAT Vertical; Row 1: ────── -cutURLParameter('http://bigmir.net/?a=b&c=d', []): http://bigmir.net/?a=b&c=d +cutURLParameter('http://bigmir.net/?a=b&c=d', array()): http://bigmir.net/?a=b&c=d cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']): http://bigmir.net/?c=d cutURLParameter('http://bigmir.net/?a=b&c=d', ['a', 'c']): http://bigmir.net/? cutURLParameter('http://bigmir.net/?a=b&c=d', ['c']): http://bigmir.net/?a=b @@ -43,7 +44,7 @@ cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['c', 'g']): http: cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']): http://bigmir.net/?a=b&c=d#e cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f -cutURLParameter('//bigmir.net/?a=b&c=d', []): //bigmir.net/?a=b&c=d +cutURLParameter('//bigmir.net/?a=b&c=d', array()): //bigmir.net/?a=b&c=d cutURLParameter('//bigmir.net/?a=b&c=d', ['a']): //bigmir.net/?c=d cutURLParameter('//bigmir.net/?a=b&c=d', ['a', 'c']): //bigmir.net/? cutURLParameter('//bigmir.net/?a=b&c=d#e=f', ['a', 'e']): //bigmir.net/?c=d# @@ -88,7 +89,7 @@ SELECT FORMAT Vertical; Row 1: ────── -cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), []): http://bigmir.net/?a=b&c=d +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), array()): http://bigmir.net/?a=b&c=d cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a']): http://bigmir.net/?c=d cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a', 'c']): http://bigmir.net/? cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['c']): http://bigmir.net/?a=b @@ -101,7 +102,7 @@ cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['c', 'g']): cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']): http://bigmir.net/?a=b&c=d#e cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f -cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), []): //bigmir.net/?a=b&c=d +cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), array()): //bigmir.net/?a=b&c=d cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a']): //bigmir.net/?c=d cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a', 'c']): //bigmir.net/? cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), ['a', 'e']): //bigmir.net/?c=d# diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql index ea2d6ae104f..6d64d2685b7 100644 --- a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql +++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql @@ -1,4 +1,7 @@ +SET allow_experimental_analyzer = 1; + -- { echoOn } + SELECT cutURLParameter('http://bigmir.net/?a=b&c=d', []), cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']), diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql index 615d42f12fa..8549300d49f 100644 --- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql +++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql @@ -8,33 +8,33 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid); INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); SELECT '== Test SELECT ... FINAL - no is_deleted =='; -select * from test FINAL; +select * from test FINAL order by uid; OPTIMIZE TABLE test FINAL CLEANUP; -select * from test; +select * from test order by uid; DROP TABLE IF EXISTS test; CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) SETTINGS clean_deleted_rows='Always'; INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); SELECT '== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always =='; -select * from test FINAL; +select * from test FINAL order by uid; OPTIMIZE TABLE test FINAL CLEANUP; -select * from test; +select * from test order by uid; -- Test the new behaviour DROP TABLE IF EXISTS test; CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); SELECT '== Test SELECT ... FINAL =='; -select * from test FINAL; -select * from test; +select * from test FINAL order by uid; +select * from test order by uid; SELECT '== Insert backups =='; INSERT INTO test (*) VALUES ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1); -select * from test FINAL; +select * from test FINAL order by uid; SELECT '== Insert a second batch with overlaping data =='; INSERT INTO test (*) VALUES ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 1), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0), ('d2', 2, 1), ('d2', 3, 0), ('d3', 2, 1), ('d3', 3, 0); -select * from test FINAL; +select * from test FINAL order by uid; DROP TABLE IF EXISTS test; CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); @@ -45,7 +45,7 @@ INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1); SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP =='; OPTIMIZE TABLE test FINAL CLEANUP; -select * from test; +select * from test order by uid; -- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount) INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1); @@ -53,7 +53,7 @@ INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, SELECT '== OPTIMIZE W/ CLEANUP (remove d6) =='; OPTIMIZE TABLE test FINAL CLEANUP; -- No d6 anymore -select * from test; +select * from test order by uid; DROP TABLE IF EXISTS test; CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS clean_deleted_rows='Always'; @@ -61,12 +61,12 @@ CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = Replac SELECT '== Test of the SETTINGS clean_deleted_rows as Always =='; INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); -- Even if the setting is set to Always, the SELECT FINAL doesn't delete rows -select * from test FINAL; -select * from test; +select * from test FINAL order by uid; +select * from test order by uid; OPTIMIZE TABLE test FINAL; -- d6 has to be removed since we set clean_deleted_rows as 'Always' -select * from test; +select * from test order by uid; SELECT '== Test of the SETTINGS clean_deleted_rows as Never =='; ALTER TABLE test MODIFY SETTING clean_deleted_rows='Never'; @@ -74,7 +74,7 @@ INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); OPTIMIZE TABLE test FINAL; -- d6 has NOT to be removed since we set clean_deleted_rows as 'Never' -select * from test; +select * from test order by uid; DROP TABLE IF EXISTS testCleanupR1; @@ -92,7 +92,7 @@ OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP; -- Only d3 to d5 remain SELECT '== (Replicas) Test optimize =='; -SELECT * FROM testCleanupR1; +SELECT * FROM testCleanupR1 order by uid; ------------------------------ @@ -110,7 +110,7 @@ OPTIMIZE TABLE testSettingsR1 FINAL; -- Only d3 to d5 remain SELECT '== (Replicas) Test settings =='; -SELECT * FROM testSettingsR1; +SELECT * FROM testSettingsR1 order by col1; ------------------------------ @@ -133,28 +133,28 @@ CREATE TABLE testMT (uid String, version UInt32, is_deleted UInt8) ENGINE = Merg INSERT INTO testMT (*) VALUES ('d1', 1, 1); OPTIMIZE TABLE testMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testMT FINAL; -SELECT * FROM testMT; +SELECT * FROM testMT order by uid; CREATE TABLE testSummingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = SummingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; INSERT INTO testSummingMT (*) VALUES ('d1', 1, 1); OPTIMIZE TABLE testSummingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testSummingMT FINAL; -SELECT * FROM testSummingMT; +SELECT * FROM testSummingMT order by uid; CREATE TABLE testAggregatingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = AggregatingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; INSERT INTO testAggregatingMT (*) VALUES ('d1', 1, 1); OPTIMIZE TABLE testAggregatingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testAggregatingMT FINAL; -SELECT * FROM testAggregatingMT; +SELECT * FROM testAggregatingMT order by uid; CREATE TABLE testCollapsingMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = CollapsingMergeTree(sign) Order by (uid) SETTINGS clean_deleted_rows='Always'; INSERT INTO testCollapsingMT (*) VALUES ('d1', 1, 1, 1); OPTIMIZE TABLE testCollapsingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testCollapsingMT FINAL; -SELECT * FROM testCollapsingMT; +SELECT * FROM testCollapsingMT order by uid; CREATE TABLE testVersionedCMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = VersionedCollapsingMergeTree(sign, version) Order by (uid) SETTINGS clean_deleted_rows='Always'; INSERT INTO testVersionedCMT (*) VALUES ('d1', 1, 1, 1); OPTIMIZE TABLE testVersionedCMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testVersionedCMT FINAL; -SELECT * FROM testVersionedCMT; +SELECT * FROM testVersionedCMT order by uid; diff --git a/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql b/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql index 9440a1fd9c0..9f26d7759de 100644 --- a/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql +++ b/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - -- Start with empty query cache (QC) and query log SYSTEM DROP QUERY CACHE; DROP TABLE system.query_log SYNC; diff --git a/tests/queries/0_stateless/02494_query_cache_compression.reference b/tests/queries/0_stateless/02494_query_cache_compression.reference new file mode 100644 index 00000000000..922ab65540a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_compression.reference @@ -0,0 +1,4804 @@ +-- insert with enabled compression +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +-- read from cache +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +-- insert with disabled compression +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +-- read from cache +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl diff --git a/tests/queries/0_stateless/02494_query_cache_compression.sql b/tests/queries/0_stateless/02494_query_cache_compression.sql new file mode 100644 index 00000000000..3d17deebd94 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_compression.sql @@ -0,0 +1,35 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; +DROP TABLE IF EXISTS t; + +-- Create test table with lot's of rows +CREATE TABLE t(c String) ENGINE=MergeTree ORDER BY c; +INSERT INTO t SELECT multiIf(n = 0, 'abc', n = 1, 'def', n = 2, 'abc', n = 3, 'jkl', '') FROM (SELECT number % 4 AS n FROM numbers(1200)); +OPTIMIZE TABLE t FINAL; + +-- Run query which, store *compressed* result in query cache +SELECT '-- insert with enabled compression'; +SELECT * FROM t ORDER BY c +SETTINGS use_query_cache = true, query_cache_compress_entries = true; + +-- Run again to check that no bad things happen and that the result is as expected +SELECT '-- read from cache'; +SELECT * FROM t ORDER BY c +SETTINGS use_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- Run query which, store *uncompressed* result in query cache +SELECT '-- insert with disabled compression'; +SELECT * FROM t ORDER BY c +SETTINGS use_query_cache = true, query_cache_compress_entries = false; + +-- Run again to check that no bad things happen and that the result is as expected +SELECT '-- read from cache'; +SELECT * FROM t ORDER BY c +SETTINGS use_query_cache = true; + +DROP TABLE t; +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql index 1f61472fcb0..bc2e7f442fc 100644 --- a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql @@ -1,7 +1,8 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; +-- (it's silly to use what will be tested below but we have to assume other tests cluttered the query cache) +SYSTEM DROP QUERY CACHE; -- Cache query result in query cache SELECT 1 SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql index b4bc9e2c258..5c45ee8aedd 100644 --- a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql +++ b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; DROP TABLE IF EXISTS eligible_test; DROP TABLE IF EXISTS eligible_test2; diff --git a/tests/queries/0_stateless/02494_query_cache_events.sql b/tests/queries/0_stateless/02494_query_cache_events.sql index d775467d525..900b68f5eb2 100644 --- a/tests/queries/0_stateless/02494_query_cache_events.sql +++ b/tests/queries/0_stateless/02494_query_cache_events.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - -- Start with empty query cache QC and query log SYSTEM DROP QUERY CACHE; DROP TABLE system.query_log SYNC; diff --git a/tests/queries/0_stateless/02494_query_cache_exception_handling.sql b/tests/queries/0_stateless/02494_query_cache_exception_handling.sql index 4d686d81ed3..70a443cc75c 100644 --- a/tests/queries/0_stateless/02494_query_cache_exception_handling.sql +++ b/tests/queries/0_stateless/02494_query_cache_exception_handling.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; -- If an exception is thrown during query execution, no entry must be created in the query cache diff --git a/tests/queries/0_stateless/02494_query_cache_explain.reference b/tests/queries/0_stateless/02494_query_cache_explain.reference index ecc965ac391..690e75bca7c 100644 --- a/tests/queries/0_stateless/02494_query_cache_explain.reference +++ b/tests/queries/0_stateless/02494_query_cache_explain.reference @@ -1,9 +1,9 @@ 1 1 -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + (Projection + Change column names to column identifiers))) Limit (preliminary LIMIT (without OFFSET)) ReadFromStorage (SystemNumbers) -Expression ((Projection + Before ORDER BY)) +Expression ((Project names + (Projection + Change column names to column identifiers))) Limit (preliminary LIMIT (without OFFSET)) ReadFromStorage (SystemNumbers) (Expression) diff --git a/tests/queries/0_stateless/02494_query_cache_explain.sql b/tests/queries/0_stateless/02494_query_cache_explain.sql index 67717efde13..d12938181c2 100644 --- a/tests/queries/0_stateless/02494_query_cache_explain.sql +++ b/tests/queries/0_stateless/02494_query_cache_explain.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; +SET allow_experimental_analyzer = 1; SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql b/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql index 7d759c86130..ee0b73f43e2 100644 --- a/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql +++ b/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; -- This creates an entry in the query cache ... diff --git a/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql b/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql index 2401beeab13..ffdf12169e3 100644 --- a/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql +++ b/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; -- Cache the query after the 1st query invocation diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql index 534d63aa427..045b7258a34 100644 --- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql +++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; -- rand() is non-deterministic, with default settings no entry in the query cache should be created diff --git a/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql b/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql index 7e3cd273312..5fd09eb935b 100644 --- a/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql +++ b/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - -- Start with empty query cache (QC) and query log. SYSTEM DROP QUERY CACHE; DROP TABLE system.query_log SYNC; diff --git a/tests/queries/0_stateless/02494_query_cache_passive_usage.sql b/tests/queries/0_stateless/02494_query_cache_passive_usage.sql index 8f1e3972b6d..6143b5f7083 100644 --- a/tests/queries/0_stateless/02494_query_cache_passive_usage.sql +++ b/tests/queries/0_stateless/02494_query_cache_passive_usage.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - -- Start with empty query cache (QC). SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.sql b/tests/queries/0_stateless/02494_query_cache_secrets.sql index 99a972b003c..66427df8f10 100644 --- a/tests/queries/0_stateless/02494_query_cache_secrets.sql +++ b/tests/queries/0_stateless/02494_query_cache_secrets.sql @@ -2,8 +2,6 @@ -- Tag no-fasttest: Depends on OpenSSL -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; -- Cache a result of a query with secret in the query cache diff --git a/tests/queries/0_stateless/00972_live_view_select_1.reference b/tests/queries/0_stateless/02494_query_cache_sparse_columns.reference similarity index 100% rename from tests/queries/0_stateless/00972_live_view_select_1.reference rename to tests/queries/0_stateless/02494_query_cache_sparse_columns.reference diff --git a/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql b/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql new file mode 100644 index 00000000000..4344d139d60 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql @@ -0,0 +1,22 @@ +-- Tags: no-parallel + +DROP TABLE IF EXISTS t_cache_sparse; +SYSTEM DROP QUERY CACHE; + +CREATE TABLE t_cache_sparse (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +SYSTEM STOP MERGES t_cache_sparse; + +INSERT INTO t_cache_sparse SELECT number, number FROM numbers(10000); +INSERT INTO t_cache_sparse SELECT number, 0 FROM numbers(10000); + +SET use_query_cache = 1; +SET max_threads = 1; + +SELECT v FROM t_cache_sparse FORMAT Null; +SELECT v FROM t_cache_sparse FORMAT Null; +SELECT count() FROM system.query_cache WHERE query LIKE 'SELECT v FROM t_cache_sparse%'; + +DROP TABLE t_cache_sparse; diff --git a/tests/queries/0_stateless/02494_query_cache_squash_partial_results.reference b/tests/queries/0_stateless/02494_query_cache_squash_partial_results.reference new file mode 100644 index 00000000000..2c4fa587dfc --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_squash_partial_results.reference @@ -0,0 +1,276 @@ +-- insert with enabled squashing +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +-- read from cache +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +-- insert with disabled squashing +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +-- read from cache +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +abc +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +def +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +ghi +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl +jkl diff --git a/tests/queries/0_stateless/02494_query_cache_squash_partial_results.sql b/tests/queries/0_stateless/02494_query_cache_squash_partial_results.sql new file mode 100644 index 00000000000..391cc7f7e1a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_squash_partial_results.sql @@ -0,0 +1,51 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; +DROP TABLE IF EXISTS t; + +-- Create test table with "many" rows +CREATE TABLE t(c String) ENGINE=MergeTree ORDER BY c; +SYSTEM STOP MERGES t; -- retain multiple parts to make the SELECT process multiple chunks +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); +INSERT INTO t values ('abc') ('def') ('ghi') ('jkl'); + +-- Run query which reads multiple chunks (small max_block_size), cache result in query cache, force squashing of partial results +SELECT '-- insert with enabled squashing'; +SELECT * FROM t ORDER BY c +SETTINGS max_block_size = 3, use_query_cache = true, query_cache_squash_partial_results = true; + +-- Run again to check that no bad things happen and that the result is as expected +SELECT '-- read from cache'; +SELECT * FROM t ORDER BY c +SETTINGS max_block_size = 3, use_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- Run query which reads multiple chunks (small max_block_size), cache result in query cache, but **disable** squashing of partial results +SELECT '-- insert with disabled squashing'; +SELECT * FROM t ORDER BY c +SETTINGS max_block_size = 3, use_query_cache = true, query_cache_squash_partial_results = false; + +-- Run again to check that no bad things happen and that the result is as expected +SELECT '-- read from cache'; +SELECT * FROM t ORDER BY c +SETTINGS max_block_size = 3, use_query_cache = true; + +DROP TABLE t; +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_totals_extremes.reference b/tests/queries/0_stateless/02494_query_cache_totals_extremes.reference new file mode 100644 index 00000000000..2f6e5e7bd87 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_totals_extremes.reference @@ -0,0 +1,43 @@ +1st run: +1 8 +2 2 + +0 10 +2nd run: +1 8 +2 2 + +0 10 +1 +--- +1st run: +1 8 +2 2 + +1 2 +2 8 +2nd run: +1 8 +2 2 + +1 2 +2 8 +1 +--- +1st run: +1 8 +2 2 + +0 10 + +1 2 +2 8 +2nd run: +1 8 +2 2 + +0 10 + +1 2 +2 8 +1 diff --git a/tests/queries/0_stateless/02494_query_cache_totals_extremes.sql b/tests/queries/0_stateless/02494_query_cache_totals_extremes.sql new file mode 100644 index 00000000000..8fde4a668c8 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_totals_extremes.sql @@ -0,0 +1,46 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; +DROP TABLE IF EXISTS tbl; + +CREATE TABLE tbl (key UInt64, agg UInt64) ENGINE = MergeTree ORDER BY key; +INSERT INTO tbl VALUES (1, 3), (2, 2), (1, 4), (1, 1); + +-- A query with totals calculation. The result should be written into / read from the query cache. +-- Check that both queries produce the same result and that a query cache entry exists. +SELECT '1st run:'; +SELECT key, sum(agg) FROM tbl GROUP BY key WITH totals ORDER BY key SETTINGS use_query_cache = 1; +SELECT '2nd run:'; +SELECT key, sum(agg) FROM tbl GROUP BY key WITH totals ORDER BY key SETTINGS use_query_cache = 1; + +SELECT count(*) FROM system.query_cache; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- A query with extremes calculation. The result should be written into / read from the query cache. +-- Check that both queries produce the same result. +SELECT '1st run:'; +SELECT key, sum(agg) FROM tbl GROUP BY key ORDER BY key SETTINGS use_query_cache = 1, extremes = 1; +SELECT '2nd run:'; +SELECT key, sum(agg) FROM tbl GROUP BY key ORDER BY key SETTINGS use_query_cache = 1, extremes = 1; + +SELECT count(*) FROM system.query_cache; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- A query with totals and extremes calculation. The result should be written into / read from the query cache. +-- Check that both queries produce the same result. +SELECT '1st run:'; +SELECT key, sum(agg) FROM tbl GROUP BY key WITH totals ORDER BY key SETTINGS use_query_cache = 1, extremes = 1; +SELECT '2nd run:'; +SELECT key, sum(agg) FROM tbl GROUP BY key WITH totals ORDER BY key SETTINGS use_query_cache = 1, extremes = 1; + +SELECT count(*) FROM system.query_cache; +DROP TABLE IF EXISTS tbl; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_ttl_long.sql b/tests/queries/0_stateless/02494_query_cache_ttl_long.sql index 135ddf2195c..acaf34ee81d 100644 --- a/tests/queries/0_stateless/02494_query_cache_ttl_long.sql +++ b/tests/queries/0_stateless/02494_query_cache_ttl_long.sql @@ -3,8 +3,6 @@ -- Tag long: Test runtime is > 6 sec -- Tag no-parallel: Messes with internal cache -SET allow_experimental_query_cache = true; - SYSTEM DROP QUERY CACHE; -- Cache query result into query cache with a TTL of 3 sec diff --git a/tests/queries/0_stateless/02494_query_cache_user_quotas.reference b/tests/queries/0_stateless/02494_query_cache_user_quotas.reference new file mode 100644 index 00000000000..ea1f0e76c95 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_quotas.reference @@ -0,0 +1,10 @@ +Run SELECT with quota that current user may use only 1 byte in the query cache 1 +Expect no entries in the query cache 0 +Run SELECT again but w/o quota 1 +Expect one entry in the query cache 1 +--- +Run SELECT which writes its result in the query cache 1 +Run another SELECT with quota that current user may write only 1 entry in the query cache 1 +Expect one entry in the query cache 1 +Run another SELECT w/o quota 1 +Expect two entries in the query cache 2 diff --git a/tests/queries/0_stateless/02494_query_cache_user_quotas.sql b/tests/queries/0_stateless/02494_query_cache_user_quotas.sql new file mode 100644 index 00000000000..123c9d21101 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_quotas.sql @@ -0,0 +1,29 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Tests per-user quotas of the query cache. Settings 'query_cache_max_size_in_bytes' and 'query_cache_max_entries' are actually supposed to +-- be used in a settings profile, together with a readonly constraint. For simplicity, test both settings stand-alone in a stateless test +-- instead of an integration test - the relevant logic will still be covered by that. + +SYSTEM DROP QUERY CACHE; + +SET query_cache_max_size_in_bytes = 1; +SELECT 'Run SELECT with quota that current user may use only 1 byte in the query cache', 1 SETTINGS use_query_cache = true; +SELECT 'Expect no entries in the query cache', count(*) FROM system.query_cache; + +SET query_cache_max_size_in_bytes = DEFAULT; +SELECT 'Run SELECT again but w/o quota', 1 SETTINGS use_query_cache = true; +SELECT 'Expect one entry in the query cache', count(*) FROM system.query_cache; + +SELECT '---'; +SYSTEM DROP QUERY CACHE; + +SELECT 'Run SELECT which writes its result in the query cache', 1 SETTINGS use_query_cache = true; +SET query_cache_max_entries = 1; +SELECT 'Run another SELECT with quota that current user may write only 1 entry in the query cache', 1 SETTINGS use_query_cache = true; +SELECT 'Expect one entry in the query cache', count(*) FROM system.query_cache; +SET query_cache_max_entries = DEFAULT; +SELECT 'Run another SELECT w/o quota', 1 SETTINGS use_query_cache = true; +SELECT 'Expect two entries in the query cache', count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql index 7a51d86dd30..98427874160 100644 --- a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql @@ -72,8 +72,8 @@ SYSTEM SYNC REPLICA wikistat2; -- Such condition will lead to successful queries. SELECT 0 FROM numbers(5) WHERE sleepEachRow(1) = 1; -select sum(hits), count() from wikistat1 GROUP BY project, subproject, path settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; -select sum(hits), count() from wikistat2 GROUP BY project, subproject, path settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +select sum(hits), count() from wikistat1 GROUP BY project, subproject, path settings optimize_use_projections = 1, force_optimize_projection = 1; +select sum(hits), count() from wikistat2 GROUP BY project, subproject, path settings optimize_use_projections = 1, force_optimize_projection = 1; DROP TABLE wikistat1; DROP TABLE wikistat2; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 598ff1a490d..ca1384fd177 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -1,21 +1,13 @@ -- Disabled query_plan_remove_redundant_sorting -- ORDER BY clauses in subqueries are untouched Expression (Projection) -Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + Projection)) - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + Projection)) - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression (Before ORDER BY) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- Enabled query_plan_remove_redundant_sorting -- ORDER BY removes ORDER BY clauses in subqueries -- query @@ -34,13 +26,9 @@ FROM ORDER BY number ASC -- explain Expression (Projection) -Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 1 @@ -62,19 +50,12 @@ FROM ORDER BY number ASC -- explain Expression (Projection) -Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + Projection)) - Header: number UInt64 Filling - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 1 @@ -97,21 +78,13 @@ FROM ORDER BY number ASC -- explain Expression (Projection) -Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + Projection)) - Header: number UInt64 LimitBy - Header: number UInt64 Expression (Before LIMIT BY) - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 1 @@ -142,27 +115,15 @@ FROM ) AS t2 -- explain Expression ((Projection + Before ORDER BY)) -Header: number UInt64 - t2.number UInt64 Join (JOIN FillRightFirst) - Header: number UInt64 - t2.number UInt64 Expression ((Before JOIN + Projection)) - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 Expression ((Joined actions + (Rename joined columns + Projection))) - Header: t2.number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 2 0 1 @@ -200,25 +161,13 @@ FROM ORDER BY t1.number, t2.number -- explain Expression (Projection) -Header: number UInt64 - t2.number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 - t2.number UInt64 Expression (Before ORDER BY) - Header: number UInt64 - t2.number UInt64 Join (JOIN FillRightFirst) - Header: number UInt64 - t2.number UInt64 Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) - Header: t2.number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 0 0 1 @@ -246,14 +195,9 @@ FROM GROUP BY number -- explain Expression ((Projection + Before ORDER BY)) -Header: sum(number) UInt64 Aggregating - Header: number UInt64 - sum(number) UInt64 Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 2 @@ -275,13 +219,9 @@ FROM GROUP BY number -- explain Expression ((Projection + Before ORDER BY)) -Header: number UInt64 Aggregating - Header: number UInt64 Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 2 @@ -297,13 +237,9 @@ FROM ) -- explain Expression ((Projection + Before ORDER BY)) -Header: sum(number) UInt64 Aggregating - Header: sum(number) UInt64 Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 45 -- check that optimization is applied recursively to subqueries as well @@ -324,18 +260,11 @@ FROM ORDER BY a ASC -- explain Expression (Projection) -Header: a UInt64 Sorting (Sorting for ORDER BY) - Header: a UInt64 Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Header: a UInt64 Aggregating - Header: number UInt64 - sum(number) UInt64 Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 1 @@ -357,17 +286,11 @@ FROM ORDER BY a ASC -- explain Expression (Projection) -Header: a UInt64 Sorting (Sorting for ORDER BY) - Header: a UInt64 Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - Header: a UInt64 Aggregating - Header: number UInt64 Expression ((Before GROUP BY + (Projection + Before ORDER BY))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 1 @@ -394,15 +317,10 @@ FROM WHERE a > 0 -- explain Expression ((Projection + (Before ORDER BY + ))) -Header: a UInt64 Aggregating - Header: number UInt64 Filter - Header: number UInt64 Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 2 1 @@ -424,17 +342,11 @@ FROM ORDER BY number ASC -- explain Expression (Projection) -Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) - Header: number UInt64 Aggregating - Header: number UInt64 Expression (Before GROUP BY) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 1 @@ -459,28 +371,16 @@ FROM ) -- explain Expression ((Projection + Before ORDER BY)) -Header: toTypeName(sum(v)) String - sum(v) Float64 Aggregating - Header: sum(v) Float64 Expression ((Before GROUP BY + Projection)) - Header: v Float64 Sorting (Sorting for ORDER BY) - Header: v Float64 Union - Header: v Float64 Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Float64 ReadFromStorage (SystemOne) - Header: dummy UInt8 Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Float64 ReadFromStorage (SystemOne) - Header: dummy UInt8 Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Float64 ReadFromStorage (SystemOne) - Header: dummy UInt8 -- execute Float64 9007199254740994 -- sum() with Nullable(Floats) depends on order, -> sorting is not removed here @@ -503,28 +403,16 @@ FROM ) -- explain Expression ((Projection + Before ORDER BY)) -Header: toTypeName(sum(v)) String - sum(v) Nullable(Float64) Aggregating - Header: sum(v) Nullable(Float64) Expression ((Before GROUP BY + Projection)) - Header: v Nullable(Float64) Sorting (Sorting for ORDER BY) - Header: v Nullable(Float64) Union - Header: v Nullable(Float64) Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Nullable(Float64) ReadFromStorage (SystemOne) - Header: dummy UInt8 Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Nullable(Float64) ReadFromStorage (SystemOne) - Header: dummy UInt8 Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Nullable(Float64) ReadFromStorage (SystemOne) - Header: dummy UInt8 -- execute Nullable(Float64) 9007199254740994 -- sumIf() with Floats depends on order, -> sorting is not removed here @@ -547,29 +435,16 @@ FROM ) -- explain Expression ((Projection + Before ORDER BY)) -Header: toTypeName(sumIf(v, greater(v, 0))) String - sumIf(v, greater(v, 0)) Float64 Aggregating - Header: sumIf(v, greater(v, 0)) Float64 Expression ((Before GROUP BY + Projection)) - Header: v Float64 - greater(v, 0) UInt8 Sorting (Sorting for ORDER BY) - Header: v Float64 Union - Header: v Float64 Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Float64 ReadFromStorage (SystemOne) - Header: dummy UInt8 Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Float64 ReadFromStorage (SystemOne) - Header: dummy UInt8 Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) - Header: v Float64 ReadFromStorage (SystemOne) - Header: dummy UInt8 -- execute Float64 9007199254740994 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function @@ -587,22 +462,12 @@ FROM ORDER BY number ASC -- explain Expression (Projection) -Header: number UInt64 - neighbor(number, 2) UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 - neighbor(number, 2) UInt64 Expression (Before ORDER BY) - Header: number UInt64 - neighbor(number, 2) UInt64 Expression (Projection) - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression (Before ORDER BY) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 0 0 1 0 @@ -627,19 +492,11 @@ FROM ) -- explain Expression (Projection) -Header: number UInt64 - plus(number, 2) UInt64 Expression (Before ORDER BY) - Header: number UInt64 - plus(number, 2) UInt64 Expression (Projection) - Header: number UInt64 Sorting (Sorting for ORDER BY) - Header: number UInt64 Expression (Before ORDER BY) - Header: number UInt64 ReadFromStorage (SystemNumbers) - Header: number UInt64 -- execute 9 11 8 10 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 45d8e188824..8b529c26d93 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -4,8 +4,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -DISABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=0;SET optimize_duplicate_order_by_and_distinct=0" -ENABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=1;SET optimize_duplicate_order_by_and_distinct=0" +if [ -z ${ENABLE_ANALYZER+x} ]; then + ENABLE_ANALYZER=0 +fi + +DISABLE_OPTIMIZATION="SET allow_experimental_analyzer=$ENABLE_ANALYZER;SET query_plan_remove_redundant_sorting=0;SET optimize_duplicate_order_by_and_distinct=0" +ENABLE_OPTIMIZATION="SET allow_experimental_analyzer=$ENABLE_ANALYZER;SET query_plan_remove_redundant_sorting=1;SET optimize_duplicate_order_by_and_distinct=0" echo "-- Disabled query_plan_remove_redundant_sorting" echo "-- ORDER BY clauses in subqueries are untouched" @@ -22,13 +26,13 @@ FROM ORDER BY number DESC ) ORDER BY number ASC" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN header=1 $query" +$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN header=1 $1" + $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" } diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference new file mode 100644 index 00000000000..ddc89a72821 --- /dev/null +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -0,0 +1,523 @@ +-- Disabled query_plan_remove_redundant_sorting +-- ORDER BY clauses in subqueries are untouched +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) + ReadFromStorage (SystemNumbers) +-- Enabled query_plan_remove_redundant_sorting +-- ORDER BY removes ORDER BY clauses in subqueries +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +ORDER BY number ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- ORDER BY cannot remove ORDER BY in subquery WITH FILL +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC WITH FILL STEP 1 +) +ORDER BY number ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) + Filling + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + LIMIT 1 BY number +) +ORDER BY number ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) + LimitBy + Expression (Before LIMIT BY) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed +-- query +SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +-- explain +Expression ((Project names + (Projection + DROP unused columns after JOIN))) + Join (JOIN FillRightFirst) + Expression ((Change column names to column identifiers + Project names)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemNumbers) + Expression ((Change column names to column identifiers + Project names)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 2 +0 1 +0 0 +1 2 +1 1 +1 0 +2 2 +2 1 +2 0 +-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries +-- query +SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +ORDER BY t1.number, t2.number +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + DROP unused columns after JOIN))) + Join (JOIN FillRightFirst) + Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))) + ReadFromStorage (SystemNumbers) + Expression ((Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +2 0 +2 1 +2 2 +-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries +-- query +SELECT sum(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 +2 +1 +-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery +-- query +SELECT any(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemNumbers) +-- execute +0 +2 +1 +-- query with aggregation function but w/o GROUP BY -> remove sorting +-- query +SELECT sum(number) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (SystemNumbers) +-- execute +45 +-- check that optimization is applied recursively to subqueries as well +-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here +-- query +SELECT a +FROM +( + SELECT sum(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + Projection))))) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function +-- query +SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + Projection))))) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY +-- query +SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + ) + GROUP BY number +) +WHERE a > 0 +-- explain +Expression ((Project names + Projection)) + Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection)))) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemNumbers) +-- execute +2 +1 +-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + GROUP BY number + ORDER BY number ASC + ) + ORDER BY number ASC +) +ORDER BY number ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Before ORDER BY + Projection)))))))))) + Aggregating + Expression ((Before GROUP BY + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- sum() with Floats depends on order, -> sorting is not removed here +-- query +SELECT + toTypeName(sum(v)), + sum(v) +FROM +( + SELECT v + FROM + ( + SELECT CAST('9007199254740992', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + ) + ORDER BY v ASC +) +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Union + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) + Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) + Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) +-- execute +Float64 9007199254740994 +-- sum() with Nullable(Floats) depends on order, -> sorting is not removed here +-- query +SELECT + toTypeName(sum(v)), + sum(v) +FROM +( + SELECT v + FROM + ( + SELECT '9007199254740992'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + ) + ORDER BY v ASC +) +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Union + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) + Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) + Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) +-- execute +Nullable(Float64) 9007199254740994 +-- sumIf() with Floats depends on order, -> sorting is not removed here +-- query +SELECT + toTypeName(sumIf(v, v > 0)), + sumIf(v, v > 0) +FROM +( + SELECT v + FROM + ( + SELECT CAST('9007199254740992', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + ) + ORDER BY v ASC +) +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Sorting (Sorting for ORDER BY) + Union + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) + Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) + Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromStorage (SystemOne) +-- execute +Float64 9007199254740994 +-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function +-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order +-- query +SELECT + number, + neighbor(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC +-- explain +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +0 0 +1 0 +2 0 +3 1 +4 2 +5 3 +6 4 +7 5 +8 6 +9 7 +-- non-stateful function does _not_ prevent removing inner ORDER BY +-- query +SELECT + number, + plus(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +-- explain +Expression (Project names) + Expression (Projection) + Expression (Change column names to column identifiers) + Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +9 11 +8 10 +7 9 +6 8 +5 7 +4 6 +3 5 +2 4 +1 3 +0 2 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.sh new file mode 100755 index 00000000000..3913609fdd2 --- /dev/null +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# shellcheck source=./02496_remove_redundant_sorting.sh +ENABLE_ANALYZER=1 . "$CURDIR"/02496_remove_redundant_sorting.sh diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference index c6265e195c4..a1a653361ee 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -37,7 +37,7 @@ QUERY id: 0 JOIN TREE TABLE id: 7, table_name: system.numbers LIMIT - CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt64 google google google @@ -80,7 +80,7 @@ QUERY id: 0 JOIN TREE TABLE id: 9, table_name: system.numbers LIMIT - CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt64 other1 other1 google1 @@ -124,7 +124,7 @@ QUERY id: 0 JOIN TREE TABLE id: 9, table_name: system.numbers LIMIT - CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64 google1 google1 google1 @@ -171,7 +171,7 @@ QUERY id: 0 JOIN TREE TABLE id: 11, table_name: system.numbers LIMIT - CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt64 google google google @@ -225,7 +225,7 @@ QUERY id: 0 JOIN TREE TABLE id: 12, table_name: system.numbers LIMIT - CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt64 other other google @@ -276,7 +276,7 @@ QUERY id: 0 JOIN TREE TABLE id: 10, table_name: system.numbers LIMIT - CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64 google google google google google google @@ -343,7 +343,7 @@ QUERY id: 0 JOIN TREE TABLE id: 9, table_name: system.numbers LIMIT - CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt64 other other other other google google @@ -404,17 +404,7 @@ QUERY id: 0 JOIN TREE TABLE id: 7, table_name: system.numbers LIMIT - CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N + CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt64 SELECT transform(number, [NULL], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) FROM ( @@ -424,56 +414,38 @@ FROM ) QUERY id: 0 PROJECTION COLUMNS - transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing) + transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String PROJECTION LIST id: 1, nodes: 1 - FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing) + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String ARGUMENTS - LIST id: 3, nodes: 4 - COLUMN id: 4, column_name: number, result_type: Nullable(Nothing), source_id: 5 - CONSTANT id: 6, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing)) - CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) - CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: Nullable(Nothing), source_id: 7 + CONSTANT id: 8, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing)) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String JOIN TREE - QUERY id: 5, is_subquery: 1 + QUERY id: 7, is_subquery: 1 PROJECTION COLUMNS number Nullable(Nothing) PROJECTION - LIST id: 9, nodes: 1 - CONSTANT id: 10, constant_value: NULL, constant_value_type: Nullable(Nothing) + LIST id: 17, nodes: 1 + CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing) JOIN TREE - TABLE id: 11, table_name: system.numbers + TABLE id: 19, table_name: system.numbers LIMIT - CONSTANT id: 12, constant_value: UInt64_10, constant_value_type: UInt8 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -SELECT transform(number, NULL, _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) -FROM system.numbers -LIMIT 10 -QUERY id: 0 - PROJECTION COLUMNS - transform(number, NULL, [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing) - PROJECTION - LIST id: 1, nodes: 1 - FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing) - ARGUMENTS - LIST id: 3, nodes: 4 - COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 - CONSTANT id: 6, constant_value: NULL, constant_value_type: Nullable(Nothing) - CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) - CONSTANT id: 8, constant_value: \'other\', constant_value_type: String - JOIN TREE - TABLE id: 5, table_name: system.numbers - LIMIT - CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64 other other google @@ -502,7 +474,7 @@ QUERY id: 0 JOIN TREE TABLE id: 5, table_name: system.numbers LIMIT - CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt64 google google google @@ -534,4 +506,4 @@ QUERY id: 0 JOIN TREE TABLE id: 7, table_name: system.numbers LIMIT - CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt8 + CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt64 diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql index c23046c7b20..492d42cb6bc 100644 --- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql @@ -33,13 +33,13 @@ SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; -SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); +SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); -- { serverError 36 } EXPLAIN SYNTAX SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); -SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 } +EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 } +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 } SET optimize_if_transform_strings_to_enum = 0; diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference index 8da37e4219c..680eb502aaa 100644 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference @@ -1,10 +1,4 @@ -key -foo -bar 1 0 -key -foo -bar 0 1 diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 4d9336bc1a0..20bde68718d 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -4,24 +4,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -DATA_FILE=$USER_FILES_PATH/test_02497_storage_file_reader.data +DATA_FILE=test_02497_$CLICKHOUSE_TEST_UNIQUE_NAME.tsv echo -e 'key\nfoo\nbar' > $DATA_FILE -QUERY_ID=$RANDOM -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ - --query_id $QUERY_ID +$CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap" +$CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferOrdinary" -$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" -$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap" +$CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferOrdinary" -QUERY_ID=$RANDOM -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ - --query_id $QUERY_ID \ - --storage_file_read_method=pread - -$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" -$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" +rm $DATA_FILE diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference new file mode 100644 index 00000000000..d0cf9ff680b --- /dev/null +++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference @@ -0,0 +1,371 @@ +-- { echoOn } + +-- basic tests + +-- expected output: {'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +-- keys and values starting with number, underscore and other special characters +-- expected output: {'$nationality':'@brazil','1name':'neymar','4ge':'31','_team':'_psg'} +WITH + extractKeyValuePairs('1name:neymar, 4ge:31 _team:_psg,$nationality:@brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'$nationality':'@brazil','1name':'neymar','4ge':'31','_team':'_psg'} +-- only special characters +-- expected output: {'#':'#','$':'$','@':'@','_':'_'} +WITH + extractKeyValuePairs('_:_, @:@ #:#,$:$') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'#':'#','$':'$','@':'@','_':'_'} +-- special (not control) characters in the middle of elements +-- expected output: {'age':'3!','name':'ney!mar','nationality':'br4z!l','t&am':'@psg'} +WITH + extractKeyValuePairs('name:ney!mar, age:3! t&am:@psg,nationality:br4z!l') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'3!','name':'ney!mar','nationality':'br4z!l','t&am':'@psg'} +-- non-standard escape characters (i.e not \n, \r, \t and etc), back-slash should be preserved +-- expected output: {'amount\\z':'$5\\h','currency':'\\$USD'} +WITH + extractKeyValuePairs('currency:\$USD, amount\z:$5\h') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'amount\\z':'$5\\h','currency':'\\$USD'} +-- invalid escape sequence at the end of file should be ignored +-- expected output: {'key':'invalid_escape_sequence','valid_key':'valid_value'} +WITH + extractKeyValuePairsWithEscaping('valid_key:valid_value key:invalid_escape_sequence\\', ':', ' ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'key':'invalid_escape_sequence','valid_key':'valid_value'} +-- standard escape sequences are covered by unit tests + +-- simple quoting +-- expected output: {'age':'31','name':'neymar','team':'psg'} +WITH + extractKeyValuePairs('name:"neymar", "age":31 "team":"psg"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','team':'psg'} +-- empty values +-- expected output: {'age':'','name':'','nationality':''} +WITH + extractKeyValuePairs('name:"", age: , nationality:') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'','name':'','nationality':''} +-- empty keys +-- empty keys are not allowed, thus empty output is expected +WITH + extractKeyValuePairs('"":abc, :def') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{} +-- semi-colon as pair delimiter +-- expected output: {'age':'31','anotherkey':'anothervalue','name':'neymar','random_key':'value_with_comma,still_part_of_value:still_part_of_value','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;random_key:value_with_comma,still_part_of_value:still_part_of_value;anotherkey:anothervalue', ':', ';') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','anotherkey':'anothervalue','name':'neymar','random_key':'value_with_comma,still_part_of_value:still_part_of_value','team':'psg'} +-- both comma and semi-colon as pair delimiters +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;nationality:brazil,last_key:last_value', ':', ';,') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +-- single quote as quoting character +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +-- NO ESCAPING TESTS +-- expected output: {'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +-- special (not control) characters in the middle of elements +-- expected output: {'age':'3!','name':'ney!mar','nationality':'br4z!l','t&am':'@psg'} +WITH + extractKeyValuePairs('name:ney!mar, age:3! t&am:@psg,nationality:br4z!l', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'3!','name':'ney!mar','nationality':'br4z!l','t&am':'@psg'} +-- non-standard escape characters (i.e not \n, \r, \t and etc), it should accept everything +-- expected output: {'amount\\z':'$5\\h','currency':'\\$USD'} +WITH + extractKeyValuePairs('currency:\$USD, amount\z:$5\h', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'amount\\z':'$5\\h','currency':'\\$USD'} +-- standard escape sequences, it should return it as it is +-- expected output: {'key1':'header\nbody','key2':'start_of_text\tend_of_text'} +WITH + extractKeyValuePairs('key1:header\nbody key2:start_of_text\tend_of_text', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'key1':'header\nbody','key2':'start_of_text\tend_of_text'} +-- standard escape sequences are covered by unit tests + +-- simple quoting +-- expected output: {'age':'31','name':'neymar','team':'psg'} +WITH + extractKeyValuePairs('name:"neymar", "age":31 "team":"psg"', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','team':'psg'} +-- empty values +-- expected output: {'age':'','name':'','nationality':''} +WITH + extractKeyValuePairs('name:"", age: , nationality:', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'','name':'','nationality':''} +-- empty keys +-- empty keys are not allowed, thus empty output is expected +WITH + extractKeyValuePairs('"":abc, :def', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{} +-- semi-colon as pair delimiter +-- expected output: {'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;nationality:brazil', ':', ';', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +-- both comma and semi-colon as pair delimiters +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +-- single quote as quoting character +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +-- { echoOn } + +SET extract_kvp_max_pairs_per_row = 2; +-- Should be allowed because it no longer exceeds the max number of pairs +-- expected output: {'key1':'value1','key2':'value2'} +WITH + extractKeyValuePairs('key1:value1,key2:value2') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'key1':'value1','key2':'value2'} +SET extract_kvp_max_pairs_per_row = 0; +-- Should be allowed because max pairs per row is set to 0 (unlimited) +-- expected output: {'key1':'value1','key2':'value2'} +WITH + extractKeyValuePairs('key1:value1,key2:value2') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'key1':'value1','key2':'value2'} +-- should not fail because pair delimiters contains 8 characters, which is within the limit +WITH + extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{} +-- key value delimiter should be considered valid part of value +WITH + extractKeyValuePairs('formula=1+2=3 argument1=1 argument2=2 result=3, char="=" char2== string="foo=bar"', '=') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'argument1':'1','argument2':'2','char':'=','char2':'=','formula':'1+2=3','result':'3','string':'foo=bar'} +-- check str_to_map alias (it is case-insensitive) +WITH + sTr_tO_mAp('name:neymar, age:31 team:psg,nationality:brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +-- check mapFromString alias +WITH + mapFromString('name:neymar, age:31 team:psg,nationality:brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; +{'age':'31','name':'neymar','nationality':'brazil','team':'psg'} diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql new file mode 100644 index 00000000000..804ff4ce880 --- /dev/null +++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql @@ -0,0 +1,506 @@ +-- { echoOn } + +-- basic tests + +-- expected output: {'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- keys and values starting with number, underscore and other special characters +-- expected output: {'$nationality':'@brazil','1name':'neymar','4ge':'31','_team':'_psg'} +WITH + extractKeyValuePairs('1name:neymar, 4ge:31 _team:_psg,$nationality:@brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- only special characters +-- expected output: {'#':'#','$':'$','@':'@','_':'_'} +WITH + extractKeyValuePairs('_:_, @:@ #:#,$:$') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- special (not control) characters in the middle of elements +-- expected output: {'age':'3!','name':'ney!mar','nationality':'br4z!l','t&am':'@psg'} +WITH + extractKeyValuePairs('name:ney!mar, age:3! t&am:@psg,nationality:br4z!l') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- non-standard escape characters (i.e not \n, \r, \t and etc), back-slash should be preserved +-- expected output: {'amount\\z':'$5\\h','currency':'\\$USD'} +WITH + extractKeyValuePairs('currency:\$USD, amount\z:$5\h') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- invalid escape sequence at the end of file should be ignored +-- expected output: {'key':'invalid_escape_sequence','valid_key':'valid_value'} +WITH + extractKeyValuePairsWithEscaping('valid_key:valid_value key:invalid_escape_sequence\\', ':', ' ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- standard escape sequences are covered by unit tests + +-- simple quoting +-- expected output: {'age':'31','name':'neymar','team':'psg'} +WITH + extractKeyValuePairs('name:"neymar", "age":31 "team":"psg"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- empty values +-- expected output: {'age':'','name':'','nationality':''} +WITH + extractKeyValuePairs('name:"", age: , nationality:') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- empty keys +-- empty keys are not allowed, thus empty output is expected +WITH + extractKeyValuePairs('"":abc, :def') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- semi-colon as pair delimiter +-- expected output: {'age':'31','anotherkey':'anothervalue','name':'neymar','random_key':'value_with_comma,still_part_of_value:still_part_of_value','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;random_key:value_with_comma,still_part_of_value:still_part_of_value;anotherkey:anothervalue', ':', ';') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- both comma and semi-colon as pair delimiters +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;nationality:brazil,last_key:last_value', ':', ';,') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- single quote as quoting character +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- NO ESCAPING TESTS +-- expected output: {'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- special (not control) characters in the middle of elements +-- expected output: {'age':'3!','name':'ney!mar','nationality':'br4z!l','t&am':'@psg'} +WITH + extractKeyValuePairs('name:ney!mar, age:3! t&am:@psg,nationality:br4z!l', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- non-standard escape characters (i.e not \n, \r, \t and etc), it should accept everything +-- expected output: {'amount\\z':'$5\\h','currency':'\\$USD'} +WITH + extractKeyValuePairs('currency:\$USD, amount\z:$5\h', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- standard escape sequences, it should return it as it is +-- expected output: {'key1':'header\nbody','key2':'start_of_text\tend_of_text'} +WITH + extractKeyValuePairs('key1:header\nbody key2:start_of_text\tend_of_text', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- standard escape sequences are covered by unit tests + +-- simple quoting +-- expected output: {'age':'31','name':'neymar','team':'psg'} +WITH + extractKeyValuePairs('name:"neymar", "age":31 "team":"psg"', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- empty values +-- expected output: {'age':'','name':'','nationality':''} +WITH + extractKeyValuePairs('name:"", age: , nationality:', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- empty keys +-- empty keys are not allowed, thus empty output is expected +WITH + extractKeyValuePairs('"":abc, :def', ':', ', ', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- semi-colon as pair delimiter +-- expected output: {'age':'31','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;nationality:brazil', ':', ';', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- both comma and semi-colon as pair delimiters +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:neymar;age:31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '"') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- single quote as quoting character +-- expected output: {'age':'31','last_key':'last_value','name':'neymar','nationality':'brazil','team':'psg'} +WITH + extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- { echoOff } + +-- cross parameter validation tests +-- should fail because key value delimiter conflicts with pair delimiters +WITH + extractKeyValuePairs('not_important', ':', ',:', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError BAD_ARGUMENTS} + +-- should fail because key value delimiter conflicts with quoting characters +WITH + extractKeyValuePairs('not_important', ':', ',', '\':') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError BAD_ARGUMENTS} + +-- should fail because pair delimiters conflicts with quoting characters +WITH + extractKeyValuePairs('not_important', ':', ',', ',') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError BAD_ARGUMENTS} + +-- should fail because data_column argument must be of type String +WITH + extractKeyValuePairs([1, 2]) AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +-- should fail because key_value_delimiter argument must be of type String +WITH + extractKeyValuePairs('', [1, 2]) AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +-- should fail because pair_delimiters argument must be of type String +WITH + extractKeyValuePairs('', ':', [1, 2]) AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +-- should fail because quoting_character argument must be of type String +WITH + extractKeyValuePairs('', ':', ' ', [1, 2]) AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + +-- should fail because pair delimiters can contain at most 8 characters +WITH + extractKeyValuePairs('not_important', ':', '123456789', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError BAD_ARGUMENTS} + +-- should fail because no argument has been provided +WITH + extractKeyValuePairs() AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +-- should fail because one extra argument / non existent has been provided +WITH + extractKeyValuePairs('a', ':', ',', '"', '') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +-- Should fail allowed because it exceeds the max number of pairs +SET extract_kvp_max_pairs_per_row = 1; +WITH + extractKeyValuePairs('key1:value1,key2:value2') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; -- {serverError LIMIT_EXCEEDED} + +-- { echoOn } + +SET extract_kvp_max_pairs_per_row = 2; +-- Should be allowed because it no longer exceeds the max number of pairs +-- expected output: {'key1':'value1','key2':'value2'} +WITH + extractKeyValuePairs('key1:value1,key2:value2') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +SET extract_kvp_max_pairs_per_row = 0; +-- Should be allowed because max pairs per row is set to 0 (unlimited) +-- expected output: {'key1':'value1','key2':'value2'} +WITH + extractKeyValuePairs('key1:value1,key2:value2') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- should not fail because pair delimiters contains 8 characters, which is within the limit +WITH + extractKeyValuePairs('not_important', ':', '12345678', '\'') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- key value delimiter should be considered valid part of value +WITH + extractKeyValuePairs('formula=1+2=3 argument1=1 argument2=2 result=3, char="=" char2== string="foo=bar"', '=') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- check str_to_map alias (it is case-insensitive) +WITH + sTr_tO_mAp('name:neymar, age:31 team:psg,nationality:brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; + +-- check mapFromString alias +WITH + mapFromString('name:neymar, age:31 team:psg,nationality:brazil') AS s_map, + CAST( + arrayMap( + (x) -> (x, s_map[x]), arraySort(mapKeys(s_map)) + ), + 'Map(String,String)' + ) AS x +SELECT + x; diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference index 32ddab4886c..2e049dbc936 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference @@ -464,3 +464,16 @@ Expression ((Projection + (Before ORDER BY + (Projection + Before ORDER BY)))) 1 0 +-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT +-- query +select distinct count() from numbers(10) group by number +-- explain +Expression (Projection) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + Aggregating + Expression (Before GROUP BY) + ReadFromStorage (SystemNumbers) +-- execute +1 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 879cc776fe1..41744cc59f9 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -4,9 +4,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +if [ -z ${ENABLE_ANALYZER+x} ]; then + ENABLE_ANALYZER=0 +fi + OPTIMIZATION_SETTING="query_plan_remove_redundant_distinct" -DISABLE_OPTIMIZATION="SET $OPTIMIZATION_SETTING=0;SET optimize_duplicate_order_by_and_distinct=0" -ENABLE_OPTIMIZATION="SET $OPTIMIZATION_SETTING=1;SET optimize_duplicate_order_by_and_distinct=0" +DISABLE_OPTIMIZATION="set allow_experimental_analyzer=$ENABLE_ANALYZER;SET $OPTIMIZATION_SETTING=0;SET optimize_duplicate_order_by_and_distinct=0" +ENABLE_OPTIMIZATION="set allow_experimental_analyzer=$ENABLE_ANALYZER;SET $OPTIMIZATION_SETTING=1;SET optimize_duplicate_order_by_and_distinct=0" echo "-- Disabled $OPTIMIZATION_SETTING" query="SELECT DISTINCT * @@ -256,3 +260,7 @@ FROM GROUP BY a WITH TOTALS )" run_query "$query" + +echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT" +query="select distinct count() from numbers(10) group by number" +run_query "$query" diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference new file mode 100644 index 00000000000..c9301c1f0a3 --- /dev/null +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference @@ -0,0 +1,481 @@ +-- Disabled query_plan_remove_redundant_distinct +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- Enabled query_plan_remove_redundant_distinct +-- DISTINCT is only in most inner subquery +-- query +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT * + FROM numbers(3) + ) +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +2 +-- do _not_ remove DISTINCT after UNION +-- query +SELECT DISTINCT number FROM +( + (SELECT DISTINCT number FROM numbers(1)) + UNION ALL + (SELECT DISTINCT number FROM numbers(2)) +) +ORDER BY number +-- explain +Expression (Project names) + Distinct (DISTINCT) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Union + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) + Expression (( + ( + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 +-- do _not_ remove DISTINCT after JOIN +-- query +SELECT DISTINCT * +FROM +( + SELECT DISTINCT number AS n + FROM numbers(2) +) as x, +( + SELECT DISTINCT number AS n + FROM numbers(2) +) as y +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + DROP unused columns after JOIN)) + Join (JOIN FillRightFirst) + Expression ((Change column names to column identifiers + Project names)) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) + Expression ((Change column names to column identifiers + Project names)) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 0 +0 1 +1 0 +1 1 +-- DISTINCT duplicates with several columns +-- query +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT number as a, 2*number as b + FROM numbers(3) + ) +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 0 +1 2 +2 4 +-- DISTINCT duplicates with constant columns +-- query +SELECT DISTINCT 2, a, b +FROM +( + SELECT DISTINCT a, b + FROM + ( + SELECT DISTINCT 1, number as a, 2*number as b + FROM numbers(3) + ) +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + Project names))))))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +2 0 0 +2 1 2 +2 2 4 +-- ARRAY JOIN: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs +-- query +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM VALUES('Hello', 'World', 'Goodbye') +) AS words +ARRAY JOIN [0, 1] AS arr +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression (Projection) + ArrayJoin (ARRAY JOIN) + Expression ((DROP unused columns before ARRAY JOIN + (ARRAY JOIN actions + (Change column names to column identifiers + Project names)))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (Values) +-- execute +Hello +World +Goodbye +-- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs +-- query +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM values('id UInt8', 0, 2) + ORDER BY id ASC WITH FILL +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + Project names))) + Filling + Distinct (DISTINCT) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (Values) +-- execute +0 +1 +2 +-- WHERE with arrayJoin(): do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs +-- query +SELECT DISTINCT * +FROM +( + SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities +) +WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim'] +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression (Projection) + Filter ((WHERE + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemOne) +-- execute +['Istanbul','Berlin','Bensheim'] +-- GROUP BY before DISTINCT with on the same columns => remove DISTINCT +-- query +SELECT DISTINCT a +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +0 +2 +1 +-- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT +-- query +SELECT DISTINCT c +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +12 +-- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT +-- query +SELECT DISTINCT c +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a WITH ROLLUP +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) + Rollup + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +12 +36 +-- GROUP BY WITH ROLLUP before DISTINCT with on the same columns => remove DISTINCT +-- query +SELECT DISTINCT a +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a WITH ROLLUP +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) + Rollup + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +0 +2 +1 +0 +-- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT +-- query +SELECT DISTINCT c +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a WITH CUBE +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) + Cube + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +12 +36 +-- GROUP BY WITH CUBE before DISTINCT with on the same columns => remove DISTINCT +-- query +SELECT DISTINCT a +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a WITH CUBE +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) + Cube + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +0 +2 +1 +0 +-- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT +-- query +SELECT DISTINCT c +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a WITH TOTALS +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Project names + Projection)))) + TotalsHaving + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +12 + +36 +-- GROUP BY WITH TOTALS before DISTINCT with on the same columns => remove DISTINCT +-- query +SELECT DISTINCT a +FROM +( + SELECT + a, + sum(b) AS c + FROM + ( + SELECT + x.number AS a, + y.number AS b + FROM numbers(3) AS x, numbers(3, 3) AS y + ) + GROUP BY a WITH TOTALS +) +-- explain +Expression ((Project names + (Projection + (Change column names to column identifiers + (Project names + Projection))))) + TotalsHaving + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + DROP unused columns after JOIN))))) + Join (JOIN FillRightFirst) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- execute +0 +2 +1 + +0 +-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT +-- query +select distinct count() from numbers(10) group by number +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression (Projection) + Aggregating + Expression ((Before GROUP BY + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +1 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.sh new file mode 100755 index 00000000000..b979980d6c4 --- /dev/null +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# shellcheck source=./02500_remove_redundant_distinct.sh +ENABLE_ANALYZER=1 . "$CURDIR"/02500_remove_redundant_distinct.sh diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index 918adc12de6..ed66c36b823 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -33,5 +33,5 @@ select count() from system.filesystem_cache_log where query_id = '$query_id' AND ${CLICKHOUSE_CLIENT} --multiline --multiquery -q " select count() from ttt; -drop table ttt no delay; +drop table ttt sync; " diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_table_source.reference b/tests/queries/0_stateless/02504_regexp_dictionary_table_source.reference index 28b98cfabf3..4e72cf4ce37 100644 --- a/tests/queries/0_stateless/02504_regexp_dictionary_table_source.reference +++ b/tests/queries/0_stateless/02504_regexp_dictionary_table_source.reference @@ -1,5 +1,11 @@ +1 0 Linux/(\\d+[\\.\\d]*).+tlinux ['version','name'] ['\\1','TencentOS'] +2 0 (\\d+)/tclwebkit(\\d+[\\.\\d]*) ['comment','version','name'] ['test $1 and $2','$1','Android'] +3 2 33/tclwebkit ['version'] ['13'] +4 2 3[12]/tclwebkit ['version'] ['12'] +5 2 3[12]/tclwebkit ['version'] ['11'] +6 2 3[12]/tclwebkit ['version'] ['10'] ('TencentOS',101,'nothing') -('Andriod',13,'test 33 and 11.10') +('Android',13,'test 33 and 11.10') ('',NULL,'nothing') ('',0,'default') 30/tclwebkit0 @@ -17,19 +23,22 @@ 42/tclwebkit12 43/tclwebkit13 44/tclwebkit14 -('Andriod',30) -('Andriod',12) -('Andriod',12) -('Andriod',13) -('Andriod',34) -('Andriod',35) -('Andriod',36) -('Andriod',37) -('Andriod',38) -('Andriod',39) -('Andriod',40) -('Andriod',41) -('Andriod',42) -('Andriod',43) -('Andriod',44) -('Andriod1',33,'matched 3') +('Android',30) +('Android',12) +('Android',12) +('Android',13) +('Android',34) +('Android',35) +('Android',36) +('Android',37) +('Android',38) +('Android',39) +('Android',40) +('Android',41) +('Android',42) +('Android',43) +('Android',44) +('Android1',33,'matched 3') +1 0 (\\d+)/tclwebkit ['version','name'] ['$1','Android'] +2 0 33/tclwebkit ['comment','version'] ['matched 3','13'] +3 1 33/tclwebkit ['name'] ['Android1'] diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql b/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql index a8f1fb17a45..42d7acbf057 100644 --- a/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql +++ b/tests/queries/0_stateless/02504_regexp_dictionary_table_source.sql @@ -1,7 +1,7 @@ -- Tags: use-vectorscan -DROP TABLE IF EXISTS regexp_dictionary_source_table; DROP DICTIONARY IF EXISTS regexp_dict1; +DROP TABLE IF EXISTS regexp_dictionary_source_table; CREATE TABLE regexp_dictionary_source_table ( @@ -15,7 +15,7 @@ CREATE TABLE regexp_dictionary_source_table -- test back reference. INSERT INTO regexp_dictionary_source_table VALUES (1, 0, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']) -INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Andriod', '$1', 'test $1 and $2']) +INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) INSERT INTO regexp_dictionary_source_table VALUES (3, 2, '33/tclwebkit', ['version'], ['13']) INSERT INTO regexp_dictionary_source_table VALUES (4, 2, '3[12]/tclwebkit', ['version'], ['12']) INSERT INTO regexp_dictionary_source_table VALUES (5, 2, '3[12]/tclwebkit', ['version'], ['11']) @@ -29,10 +29,11 @@ create dictionary regexp_dict1 comment String default 'nothing' ) PRIMARY KEY(regexp) -SOURCE(CLICKHOUSE(QUERY concat('select * from ', currentDatabase() , '.regexp_dictionary_source_table'))) +SOURCE(CLICKHOUSE(TABLE 'regexp_dictionary_source_table')) LIFETIME(0) -LAYOUT(regexp_tree) -SETTINGS(regexp_dict_allow_other_sources = true); +LAYOUT(regexp_tree); + +select * from dictionary(regexp_dict1); select dictGet('regexp_dict1', ('name', 'version', 'comment'), 'Linux/101.tlinux'); select dictGet('regexp_dict1', ('name', 'version', 'comment'), '33/tclwebkit11.10x'); @@ -64,14 +65,14 @@ SYSTEM RELOAD dictionary regexp_dict1; -- { serverError 489 } truncate table regexp_dictionary_source_table; INSERT INTO regexp_dictionary_source_table VALUES (1, 2, 'Linux/(\d+[\.\d]*).+tlinux', ['name', 'version'], ['TencentOS', '\1']) -INSERT INTO regexp_dictionary_source_table VALUES (2, 3, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Andriod', '$1', 'test $1 and $2']) -INSERT INTO regexp_dictionary_source_table VALUES (3, 1, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Andriod', '$1', 'test $1 and $2']) +INSERT INTO regexp_dictionary_source_table VALUES (2, 3, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) +INSERT INTO regexp_dictionary_source_table VALUES (3, 1, '(\d+)/tclwebkit(\d+[\.\d]*)', ['name', 'version', 'comment'], ['Android', '$1', 'test $1 and $2']) SYSTEM RELOAD dictionary regexp_dict1; -- { serverError 489 } -- test priority truncate table regexp_dictionary_source_table; -INSERT INTO regexp_dictionary_source_table VALUES (1, 0, '(\d+)/tclwebkit', ['name', 'version'], ['Andriod', '$1']); -INSERT INTO regexp_dictionary_source_table VALUES (3, 1, '33/tclwebkit', ['name'], ['Andriod1']); -- child has more priority than parents. +INSERT INTO regexp_dictionary_source_table VALUES (1, 0, '(\d+)/tclwebkit', ['name', 'version'], ['Android', '$1']); +INSERT INTO regexp_dictionary_source_table VALUES (3, 1, '33/tclwebkit', ['name'], ['Android1']); -- child has more priority than parents. INSERT INTO regexp_dictionary_source_table VALUES (2, 0, '33/tclwebkit', ['version', 'comment'], ['13', 'matched 3']); -- larger id has lower priority than small id. SYSTEM RELOAD dictionary regexp_dict1; select dictGet(regexp_dict1, ('name', 'version', 'comment'), '33/tclwebkit'); @@ -79,7 +80,8 @@ select dictGet(regexp_dict1, ('name', 'version', 'comment'), '33/tclwebkit'); truncate table regexp_dictionary_source_table; SYSTEM RELOAD dictionary regexp_dict1; -- { serverError 489 } +select * from dictionary(regexp_dict1); +DROP DICTIONARY IF EXISTS regexp_dict1; DROP TABLE IF EXISTS regexp_dictionary_source_table; DROP TABLE IF EXISTS needle_table; -DROP DICTIONARY IF EXISTS regexp_dict1; diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.reference b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.reference index b161b099eef..872ee1a0f33 100644 --- a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.reference +++ b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.reference @@ -1,793 +1,793 @@ -AppleTV Other 0.0 ATV OS X 0.0.0 -LG-M150 Firefox Mobile 68.0 Android 7.0.0 -Generic Smartphone Firefox Mobile 68.0 Android 8.0.0 -Generic Tablet Firefox Mobile 68.0 Android 8.1.0 -Generic Smartphone Firefox Mobile 68.0 Android 9.0.0 -PH-1 Chrome Mobile 77.0 Android 10.0.0 -Pixel 2 XL Chrome Mobile 77.0 Android 10.0.0 -Pixel 2 Chrome Mobile 77.0 Android 10.0.0 -Pixel 3 Facebook 240.0 Android 10.0.0 -Pixel XL Chrome Mobile WebView 77.0 Android 10.0.0 -Pixel XL Chrome Mobile 77.0 Android 10.0.0 -HTC Sensation 4G Chrome Mobile 42.0 Android 4.0.3 -Kindle Amazon Silk 73.7 Android 4.0.3 -Samsung GT-I9152 Chrome Mobile 42.0 Android 4.2.2 -Samsung GT-N5110 Chrome 76.0 Android 4.4.2 -RCT6773W22 Chrome 77.0 Android 4.4.2 -Samsung SM-T217S Chrome 77.0 Android 4.4.2 -Samsung SM-T530NU Chrome 77.0 Android 4.4.2 -TegraNote-P1640 Chrome 69.0 Android 4.4.2 -Kindle Amazon Silk 76.3 Android 4.4.3 -Samsung SM-A500H Chrome Mobile 73.0 Android 5.0.2 -Samsung SM-T357T Chrome 77.0 Android 5.0.2 -Samsung SM-T530NU Chrome 76.0 Android 5.0.2 -Samsung SM-T530NU Chrome 77.0 Android 5.0.2 -RCT6213W87DK Yandex Browser 19.4 Android 5.0.0 -Samsung SM-N900T Facebook 229.0 Android 5.0.0 -Generic Smartphone Chrome Mobile WebView 70.0 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -AFTT Chrome Mobile WebView 70.0 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -Kindle Amazon Silk 71.2 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -Kindle Amazon Silk 76.3 Android 5.1.1 -Kindle Amazon Silk 77.1 Android 5.1.1 -LG-AS330 Chrome Mobile 77.0 Android 5.1.1 -LGL43AL Chrome Mobile 77.0 Android 5.1.1 -Samsung SM-G530R7 Samsung Internet 9.2 Android 5.1.1 -Samsung SM-T377P Samsung Internet 10.1 Android 5.1.1 -Samsung SM-T900 Samsung Internet 10.1 Android 5.1.1 -Samsung SM-T337A Chrome 69.0 Android 5.1.1 -Samsung SM-G360T1 Chrome Mobile 67.0 Android 5.1.1 -Samsung SM-J320FN Chrome Mobile 74.0 Android 5.1.1 -SM-T280 Chrome 74.0 Android 5.1.1 -Samsung SM-T330NU Chrome 71.0 Android 5.1.1 -SM-T670 Chrome 76.0 Android 5.1.1 -SM-T670 Chrome 77.0 Android 5.1.1 -Vodafone Smart ultra 6 Chrome Mobile WebView 74.0 Android 5.1.1 -BLU Advance 5.0 Chrome Mobile 66.0 Android 5.1.0 -HTC Desire 626s Chrome Mobile 77.0 Android 5.1.0 -HUAWEI LUA-L22 Chrome Mobile 50.0 Android 5.1.0 -NX16A11264 Chrome 77.0 Android 5.1.0 -XT1526 Chrome Mobile 73.0 Android 5.1.0 -Oppo CPH1613 Chrome Mobile 77.0 Android 6.0.1 -LG-M153 Chrome Mobile WebView 55.0 Android 6.0.1 -LG-M153 Chrome Mobile 77.0 Android 6.0.1 -LGLS676 Chrome Mobile 77.0 Android 6.0.1 -N9136 Chrome Mobile 74.0 Android 6.0.1 -Asus Nexus 7 Chrome 44.0 Android 6.0.1 -Samsung SM-G900I Samsung Internet 10.1 Android 6.0.1 -Samsung SM-G900P Samsung Internet 7.2 Android 6.0.1 -Samsung SM-J700M Samsung Internet 10.1 Android 6.0.1 -Samsung SM-S327VL Samsung Internet 10.1 Android 6.0.1 -Samsung SM-T377A Chrome 77.0 Android 6.0.1 -Samsung SM-G532M Chrome Mobile 55.0 Android 6.0.1 -Samsung SM-G532M Facebook 240.0 Android 6.0.1 -Samsung SM-G532M Chrome Mobile 77.0 Android 6.0.1 -Samsung SM-G550T Chrome Mobile 76.0 Android 6.0.1 -Samsung SM-G550T Chrome Mobile 77.0 Android 6.0.1 -Samsung SM-G550T1 Chrome Mobile 76.0 Android 6.0.1 -Samsung SM-G900V Chrome Mobile 73.0 Android 6.0.1 -Samsung SM-G920A Chrome Mobile 77.0 Android 6.0.1 -Samsung SM-J327P Chrome Mobile 77.0 Android 6.0.1 -Samsung SM-N910S Chrome Mobile 75.0 Android 6.0.1 -Samsung SM-N920V Chrome Mobile 76.0 Android 6.0.1 -Samsung SM-T350 Chrome 59.0 Android 6.0.1 -Samsung SM-T560NU Chrome 77.0 Android 6.0.1 -SM-T800 Chrome 77.0 Android 6.0.1 -XT1254 Chrome Mobile 77.0 Android 6.0.1 -Z798BL Chrome Mobile 67.0 Android 6.0.1 -Z799VL Chrome Mobile WebView 45.0 Android 6.0.1 -5010X Chrome Mobile 76.0 Android 6.0.0 -Huawei CAM-L21 Chrome Mobile 77.0 Android 6.0.0 -F3313 Chrome Mobile 77.0 Android 6.0.0 -RCT6603W47M7 Chrome 77.0 Android 6.0.0 -5049Z Chrome Mobile 56.0 Android 7.0.0 -Asus A002A Chrome Mobile 77.0 Android 7.0.0 -Alcatel_5044C Chrome Mobile 77.0 Android 7.0.0 -Astra Young Pro Chrome Mobile WebView 59.0 Android 7.0.0 -Infinix X571 Chrome Mobile 77.0 Android 7.0.0 -LG-H872 Chrome Mobile 64.0 Android 7.0.0 -LG-K425 Chrome Mobile 55.0 Android 7.0.0 -LG-LS777 Chrome Mobile 77.0 Android 7.0.0 -LG-M210 Chrome Mobile 77.0 Android 7.0.0 -LG-M430 Chrome Mobile 77.0 Android 7.0.0 -LG-TP260 Chrome Mobile WebView 64.0 Android 7.0.0 -LG-TP260 Chrome Mobile 77.0 Android 7.0.0 -LG-TP450 Chrome Mobile 64.0 Android 7.0.0 -LG-V521 Chrome 75.0 Android 7.0.0 -LG-V521 Chrome 77.0 Android 7.0.0 -LGMP260 Chrome Mobile 58.0 Android 7.0.0 -LGMS210 Chrome Mobile 55.0 Android 7.0.0 -LGMS210 Chrome Mobile 77.0 Android 7.0.0 -P00I Chrome 77.0 Android 7.0.0 -RS988 Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-J701F Samsung Internet 10.1 Android 7.0.0 -Samsung SM-J710F Samsung Internet 10.1 Android 7.0.0 -Samsung SM-N920T Samsung Internet 9.2 Android 7.0.0 -Samsung SM-G920A Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-G920P Flipboard 4.2 Android 7.0.0 -Samsung SM-G920V Chrome Mobile 76.0 Android 7.0.0 -Samsung SM-G928V Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-G950U Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-G955U Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-J327T Chrome Mobile 74.0 Android 7.0.0 -Samsung SM-J327T Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-J327T1 Chrome Mobile 64.0 Android 7.0.0 -Samsung SM-J327T1 Chrome Mobile 75.0 Android 7.0.0 -Samsung SM-J327T1 Chrome Mobile 77.0 Android 7.0.0 -Samsung SM-N9208 Chrome Mobile 73.0 Android 7.0.0 -Samsung SM-N920P Chrome Mobile 74.0 Android 7.0.0 -Samsung SM-N920T Chrome Mobile 77.0 Android 7.0.0 -SM-T585 Chrome 77.0 Android 7.0.0 -SM-T810 Chrome 75.0 Android 7.0.0 -SM-T810 Chrome 76.0 Android 7.0.0 -SM-T810 Chrome 77.0 Android 7.0.0 -SM-T813 Chrome 76.0 Android 7.0.0 -SM-T813 Chrome 76.0 Android 7.0.0 -Trekstor ST1009X Chrome 75.0 Android 7.0.0 -XT1663 Chrome Mobile 77.0 Android 7.0.0 -Generic Smartphone Chrome Mobile 58.0 Android 7.0.0 -A574BL Chrome Mobile WebView 77.0 Android 7.1.1 -A574BL Chrome Mobile 77.0 Android 7.1.1 -Oppo CPH1729 Facebook 240.0 Android 7.1.1 -3632A Chrome Mobile 74.0 Android 7.1.1 -General Mobile 4G Dual Chrome Mobile 77.0 Android 7.1.1 -Moto E (4) Plus Chrome Mobile WebView 76.0 Android 7.1.1 -Moto E (4) Chrome Mobile 70.0 Android 7.1.1 -Moto E (4) Chrome Mobile 76.0 Android 7.1.1 -Moto E (4) Chrome Mobile 77.0 Android 7.1.1 -Moto E (4) Chrome Mobile 77.0 Android 7.1.1 -NX591J Chrome Mobile 77.0 Android 7.1.1 -REVVLPLUS C3701A Chrome Mobile 64.0 Android 7.1.1 -Samsung SM-J320A Samsung Internet 10.1 Android 7.1.1 -Samsung SM-T550 Samsung Internet 10.1 Android 7.1.1 -Samsung SM-T377A Chrome 64.0 Android 7.1.1 -Samsung SM-J250F Chrome Mobile 76.0 Android 7.1.1 -Samsung SM-J700T Chrome Mobile 77.0 Android 7.1.1 -SM-T350 Chrome 77.0 Android 7.1.1 -Samsung SM-T377T Chrome 77.0 Android 7.1.1 -Samsung SM-T550 Chrome 69.0 Android 7.1.1 -SM-T550 Chrome 77.0 Android 7.1.1 -Samsung SM-T560NU Chrome 77.0 Android 7.1.1 -X20 Chrome Mobile WebView 52.0 Android 7.1.1 -Z851M Chrome Mobile 58.0 Android 7.1.1 -Z899VL Chrome Mobile WebView 74.0 Android 7.1.1 -Z982 Chrome Mobile WebView 75.0 Android 7.1.1 -Z982 Chrome Mobile 77.0 Android 7.1.1 -Generic Smartphone Chrome Mobile WebView 70.0 Android 7.1.2 -AFTKMST12 Chrome Mobile WebView 70.0 Android 7.1.2 -Kindle Amazon Silk 76.3 Android 7.1.2 -AFTMM Chrome Mobile WebView 70.0 Android 7.1.2 -AFTN Chrome Mobile WebView 70.0 Android 7.1.2 -KFKAWI Chrome Mobile WebView 59.0 Android 7.1.2 -Kindle Amazon Silk 76.3 Android 7.1.2 -Kindle Amazon Silk 76.3 Android 7.1.2 -LG-SP200 Chrome Mobile 75.0 Android 7.1.2 -LG-SP200 Chrome Mobile 76.0 Android 7.1.2 -LM-X210(G) Chrome Mobile 76.0 Android 7.1.2 -LM-X210 Chrome Mobile 76.0 Android 7.1.2 -RCT6973W43R Chrome 77.0 Android 7.1.2 -XiaoMi Redmi 4 Chrome Mobile 77.0 Android 7.1.2 -Generic Smartphone Chrome Mobile WebView 76.0 Android 8.0.0 -Asus Z01FD Chrome Mobile 77.0 Android 8.0.0 -Huawei AUM-L29 Chrome Mobile 77.0 Android 8.0.0 -BRAVIA 4K GB Chrome Mobile WebView 77.0 Android 8.0.0 -CMR-W09 Chrome 77.0 Android 8.0.0 -EVA-AL00 Chrome Mobile 77.0 Android 8.0.0 -G3223 Chrome Mobile 77.0 Android 8.0.0 -LG-H910 Chrome Mobile 77.0 Android 8.0.0 -LG-H931 Chrome Mobile 76.0 Android 8.0.0 -LG-H932 Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-A520F Samsung Internet 10.1 Android 8.0.0 -Samsung SM-G891A Samsung Internet 8.2 Android 8.0.0 -Samsung SM-G935T Samsung Internet 10.1 Android 8.0.0 -Samsung SM-G955U Samsung Internet 10.1 Android 8.0.0 -Samsung SM-J337T Samsung Internet 9.2 Android 8.0.0 -Samsung SM-J737P Samsung Internet 10.1 Android 8.0.0 -Samsung SM-N950F Samsung Internet 10.1 Android 8.0.0 -Samsung SM-G891A Chrome Mobile 72.0 Android 8.0.0 -Samsung SM-G935A Chrome Mobile 76.0 Android 8.0.0 -Samsung SM-A720F Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-G570F Facebook 231.0 Android 8.0.0 -Samsung SM-G570Y Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-G930T Chrome Mobile WebView 77.0 Android 8.0.0 -Samsung SM-G930V Chrome Mobile 64.0 Android 8.0.0 -Samsung SM-G930VL Chrome Mobile 74.0 Android 8.0.0 -Samsung SM-G935F Chrome Mobile 75.0 Android 8.0.0 -Samsung SM-G935P Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-G935T Facebook 240.0 Android 8.0.0 -Samsung SM-G935T Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-G950U Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-G955U Chrome Mobile 74.0 Android 8.0.0 -Samsung SM-G955U Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-J330G Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-J337T Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-J737A Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-J737T1 Chrome Mobile 66.0 Android 8.0.0 -Samsung SM-J737T1 Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-N950F Chrome Mobile 66.0 Android 8.0.0 -Samsung SM-N950U Chrome Mobile 76.0 Android 8.0.0 -Samsung SM-N950U Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-N950U1 Chrome Mobile 77.0 Android 8.0.0 -Samsung SM-S367VL Chrome Mobile 77.0 Android 8.0.0 -VS995 Chrome Mobile 77.0 Android 8.0.0 -XT1635-02 Chrome Mobile 77.0 Android 8.0.0 -moto e5 play Chrome Mobile 76.0 Android 8.0.0 -moto e5 play Chrome Mobile 77.0 Android 8.0.0 -moto e5 supra Chrome Mobile 76.0 Android 8.0.0 -moto g(6) Chrome Mobile 77.0 Android 8.0.0 -5041C Chrome Mobile 77.0 Android 8.1.0 -6062W Chrome Mobile 77.0 Android 8.1.0 -A502DL Chrome Mobile 67.0 Android 8.1.0 -A502DL Chrome Mobile 76.0 Android 8.1.0 -Huawei BKK-LX2 Chrome Mobile 76.0 Android 8.1.0 -C4 Chrome Mobile 70.0 Android 8.1.0 -3310A Chrome Mobile 77.0 Android 8.1.0 -Infinix X604 Chrome Mobile 64.0 Android 8.1.0 -Joy 1 Chrome Mobile 77.0 Android 8.1.0 -LAVA LE9820 Chrome Mobile 77.0 Android 8.1.0 -LG-Q710AL Chrome Mobile 77.0 Android 8.1.0 -LM-Q610(FGN) Chrome Mobile 77.0 Android 8.1.0 -LM-Q710(FGN) Facebook 235.0 Android 8.1.0 -LM-Q710(FGN) Chrome Mobile 70.0 Android 8.1.0 -LM-Q710(FGN) Chrome Mobile 76.0 Android 8.1.0 -LM-Q710(FGN) Chrome Mobile 76.0 Android 8.1.0 -LM-Q710(FGN) Chrome Mobile 77.0 Android 8.1.0 -LM-V405 Chrome Mobile 77.0 Android 8.1.0 -LM-X210(G) UC Browser 11.6 Android 8.1.0 -LM-X210(G) Chrome Mobile 70.0 Android 8.1.0 -LM-X210(G) Chrome Mobile 72.0 Android 8.1.0 -LM-X210(G) Chrome Mobile 77.0 Android 8.1.0 -LM-X212(G) Chrome Mobile 77.0 Android 8.1.0 -LM-X220 Chrome Mobile 70.0 Android 8.1.0 -LM-X220 Chrome Mobile 76.0 Android 8.1.0 -LM-X220PM Chrome Mobile WebView 77.0 Android 8.1.0 -LM-X410(FG) Chrome Mobile 70.0 Android 8.1.0 -LM-X410(FG) Chrome Mobile 76.0 Android 8.1.0 -LM-X410(FG) Chrome Mobile 77.0 Android 8.1.0 -LM-X410.FGN Chrome Mobile 68.0 Android 8.1.0 -LML414DL Chrome Mobile 76.0 Android 8.1.0 -LML713DL Chrome Mobile 77.0 Android 8.1.0 -Moto G (5S) Plus Chrome Mobile 77.0 Android 8.1.0 -HTC One Chrome Mobile WebView 70.0 Android 8.1.0 -RCT6873W42BMF8KC Chrome Mobile 77.0 Android 8.1.0 -REVVL 2 Chrome Mobile 67.0 Android 8.1.0 -REVVL 2 Chrome Mobile 76.0 Android 8.1.0 -Samsung SM-J727T Samsung Internet 10.1 Android 8.1.0 -Samsung SM-J727T1 Samsung Internet 9.4 Android 8.1.0 -Samsung SM-J727T1 Samsung Internet 10.1 Android 8.1.0 -Samsung SM-T580 Samsung Internet 9.4 Android 8.1.0 -Samsung SM-J727A Facebook 240.0 Android 8.1.0 -Samsung SM-G610F Chrome Mobile 77.0 Android 8.1.0 -Samsung SM-J260T1 Chrome Mobile 76.0 Android 8.1.0 -Samsung SM-J260T1 Chrome Mobile 76.0 Android 8.1.0 -Samsung SM-J260T1 Chrome Mobile 77.0 Android 8.1.0 -Samsung SM-J410F Chrome Mobile 77.0 Android 8.1.0 -Samsung SM-J727P Chrome Mobile 68.0 Android 8.1.0 -Samsung SM-J727T Chrome Mobile 66.0 Android 8.1.0 -Samsung SM-J727T1 Chrome Mobile 76.0 Android 8.1.0 -Samsung SM-J727T1 Chrome Mobile 77.0 Android 8.1.0 -Samsung SM-J727T1 Chrome Mobile 77.0 Android 8.1.0 -Samsung SM-J727V Chrome Mobile 70.0 Android 8.1.0 -Samsung SM-J727V Chrome Mobile 77.0 Android 8.1.0 -SM-P580 Chrome 77.0 Android 8.1.0 -SM-T380 Chrome 75.0 Android 8.1.0 -SM-T580 Edge Mobile 42.0 Android 8.1.0 -SM-T580 Chrome 76.0 Android 8.1.0 -SM-T580 Chrome 76.0 Android 8.1.0 -SM-T580 Chrome 77.0 Android 8.1.0 -Samsung SM-T837T Chrome 77.0 Android 8.1.0 -TECNO CF8 Facebook 239.0 Android 8.1.0 -V1818CA Chrome Mobile 75.0 Android 8.1.0 -meizu C9 Chrome Mobile 68.0 Android 8.1.0 -vivo 1724 Chrome Mobile 76.0 Android 8.1.0 -vivo 1814 Chrome Mobile 77.0 Android 8.1.0 -Generic Smartphone DuckDuckGo Mobile 5.0 Android 9.0.0 -1825 Chrome Mobile 70.0 Android 9.0.0 -ANE-LX2 Facebook 236.0 Android 9.0.0 -BLA-A09 Chrome Mobile 77.0 Android 9.0.0 -Huawei CLT-L04 Chrome Mobile 77.0 Android 9.0.0 -Oppo CPH1911 Facebook 239.0 Android 9.0.0 -Oppo CPH1923 Chrome Mobile WebView 76.0 Android 9.0.0 -Huawei ELE-L29 Chrome Mobile 77.0 Android 9.0.0 -G8142 Chrome Mobile 77.0 Android 9.0.0 -GM1911 Chrome Mobile 76.0 Android 9.0.0 -GM1917 Chrome Mobile 77.0 Android 9.0.0 -Huawei INE-LX2 Chrome Mobile 76.0 Android 9.0.0 -LM-G710 Chrome Mobile WebView 77.0 Android 9.0.0 -LM-Q720 Chrome Mobile 77.0 Android 9.0.0 -LM-V405 Chrome Mobile WebView 77.0 Android 9.0.0 -LM-V405 Chrome Mobile 76.0 Android 9.0.0 -LM-V500N Chrome Mobile 77.0 Android 9.0.0 -LM-X420 Chrome Mobile 72.0 Android 9.0.0 -LM-X420 Chrome Mobile 77.0 Android 9.0.0 -MAR-LX1A Chrome Mobile 77.0 Android 9.0.0 -XiaoMi MI 9 Chrome Mobile 77.0 Android 9.0.0 -XiaoMi Mi A2 Chrome Mobile 77.0 Android 9.0.0 -Moto Z (2) Chrome Mobile 77.0 Android 9.0.0 -Nokia 6 Chrome Mobile 77.0 Android 9.0.0 -OnePlus ONEPLUS A6000 Chrome Mobile 77.0 Android 9.0.0 -OnePlus ONEPLUS A6003 Chrome Mobile 77.0 Android 9.0.0 -OnePlus ONEPLUS A6013 Chrome Mobile WebView 77.0 Android 9.0.0 -OnePlus ONEPLUS A6013 Chrome Mobile 74.0 Android 9.0.0 -OnePlus ONEPLUS A6013 Chrome Mobile 77.0 Android 9.0.0 -PAR-AL00 Facebook 235.0 Android 9.0.0 -Pixel 2 XL Chrome Mobile 77.0 Android 9.0.0 -Pixel 3 Chrome Mobile WebView 77.0 Android 9.0.0 -Pixel 3 Chrome Mobile 76.0 Android 9.0.0 -Pixel 3 Chrome Mobile 77.0 Android 9.0.0 -Pixel 3a XL Chrome Mobile 77.0 Android 9.0.0 -REVVLRY Chrome Mobile 73.0 Android 9.0.0 -Oppo RMX1801 Chrome Mobile 75.0 Android 9.0.0 -XiaoMi Redmi 7 Chrome Mobile 77.0 Android 9.0.0 -XiaoMi Redmi Note 7 Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-A102U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-A505FN Samsung Internet 10.1 Android 9.0.0 -Samsung SM-A505GN Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G892U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G950U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G955F Samsung Internet 9.4 Android 9.0.0 -Samsung SM-G955U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G9600 Samsung Internet 9.4 Android 9.0.0 -Samsung SM-G960U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G965U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G970F Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G970U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G973U Samsung Internet 9.4 Android 9.0.0 -Samsung SM-G973U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-G975U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-J415F Samsung Internet 10.1 Android 9.0.0 -Samsung SM-J730F Samsung Internet 10.1 Android 9.0.0 -Samsung SM-J737P Samsung Internet 10.1 Android 9.0.0 -Samsung SM-J737T Samsung Internet 9.0 Android 9.0.0 -Samsung SM-N950U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-N960F Samsung Internet 10.1 Android 9.0.0 -Samsung SM-N960U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-N960U1 Samsung Internet 9.2 Android 9.0.0 -Samsung SM-N970U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-N975U Samsung Internet 10.1 Android 9.0.0 -Samsung SM-N975U1 Samsung Internet 10.1 Android 9.0.0 -Samsung SM-T510 Samsung Internet 10.1 Android 9.0.0 -Samsung SM-T720 Samsung Internet 10.1 Android 9.0.0 -SHIELD Android TV Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-A102U Chrome Mobile 72.0 Android 9.0.0 -Samsung SM-A102U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-A105M Facebook 237.0 Android 9.0.0 -Samsung SM-A205G Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-A205U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-A505F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-A530F Facebook 240.0 Android 9.0.0 -Samsung SM-A530N Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-A600T Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-A605F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-A920F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G892A Chrome Mobile 74.0 Android 9.0.0 -Samsung SM-G950F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G950U Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-G950U Chrome Mobile 71.0 Android 9.0.0 -Samsung SM-G950U Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-G950U Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-G950U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G950U1 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G955F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G955U Facebook 240.0 Android 9.0.0 -Samsung SM-G955U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G9600 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G960U Facebook 233.0 Android 9.0.0 -Samsung SM-G960U Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-G960U Chrome Mobile 71.0 Android 9.0.0 -Samsung SM-G960U Chrome Mobile 74.0 Android 9.0.0 -Samsung SM-G960U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G960U1 Facebook 240.0 Android 9.0.0 -Samsung SM-G960U1 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G965F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G965U Chrome Mobile 74.0 Android 9.0.0 -Samsung SM-G965U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G965U Chrome Mobile 79.0 Android 9.0.0 -Samsung SM-G965U1 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G970U Facebook 240.0 Android 9.0.0 -Samsung SM-G970U Chrome Mobile 75.0 Android 9.0.0 -Samsung SM-G970U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G970U1 Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-G973U Chrome Mobile 74.0 Android 9.0.0 -Samsung SM-G973U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G973U1 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G975U Chrome Mobile 75.0 Android 9.0.0 -Samsung SM-G975U Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-G975U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-G975U1 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-J260A Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-J337P Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-J600FN Chrome Mobile 75.0 Android 9.0.0 -Samsung SM-J600G Facebook 238.0 Android 9.0.0 -Samsung SM-J730F Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-J737A Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-J737A Chrome Mobile 74.0 Android 9.0.0 -Samsung SM-J737V Pinterest 0.0 Android 9.0.0 -Samsung SM-J737V Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-J810M Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-N950U Facebook 240.0 Android 9.0.0 -Samsung SM-N950U Chrome Mobile 72.0 Android 9.0.0 -Samsung SM-N950U Chrome Mobile 75.0 Android 9.0.0 -Samsung SM-N950U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-N950U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-N960F Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-N960U Facebook 240.0 Android 9.0.0 -Samsung SM-N960U Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-N960U Chrome Mobile 74.0 Android 9.0.0 -Samsung SM-N960U Chrome Mobile 75.0 Android 9.0.0 -Samsung SM-N960U Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-N960U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-N960U1 Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-N975U Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-N975U Chrome Mobile WebView 77.0 Android 9.0.0 -Samsung SM-N975U Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-N976V Facebook 240.0 Android 9.0.0 -Samsung SM-S367VL Chrome Mobile 77.0 Android 9.0.0 -Samsung SM-S767VL Chrome Mobile 76.0 Android 9.0.0 -Samsung SM-T597P Chrome 77.0 Android 9.0.0 -SM-T720 Chrome 77.0 Android 9.0.0 -TECNO KC8 Chrome Mobile 77.0 Android 9.0.0 -Huawei VOG-L29 Chrome Mobile 77.0 Android 9.0.0 -cp3705A Chrome Mobile 74.0 Android 9.0.0 -moto g(6) Chrome Mobile WebView 77.0 Android 9.0.0 -moto g(6) play Chrome Mobile 77.0 Android 9.0.0 -moto g(7) play Facebook 235.0 Android 9.0.0 -moto g(7) play Chrome Mobile 70.0 Android 9.0.0 -moto g(7) power Chrome Mobile 75.0 Android 9.0.0 -moto g(7) power Chrome Mobile 77.0 Android 9.0.0 -moto z4 Chrome Mobile 73.0 Android 9.0.0 -moto z4 Chrome Mobile 77.0 Android 9.0.0 -Samsung GT-P3113 Android 4.1 Android 4.1.1 -Samsung GT-I8160 Android 4.1 Android 4.1.2 -Asus Nexus 7 Android 4.2 Android 4.2.2 -Samsung SM-E500H Android 4.4 Android 4.4.0 -LGMS550 Chrome Mobile WebView 43.0 Android 6.0.1 -Samsung SM-J737T1 Chrome Mobile WebView 43.0 Android 6.0.1 -TECNO CA6 Opera Mobile 5.3 Android 7.0.0 -XiaoMi Redmi 5A MiuiBrowser 9.5 Android 7.1.2 -Oppo CPH1911 Chrome Mobile WebView 70.0 Android 9.0.0 -vivo 1904 Opera Mobile 44.1 Android 9.0.0 -Mac Firefox 68.0 Mac OS X 10.11.0 -Mac Firefox 69.0 Mac OS X 10.13.0 -Mac Firefox 67.0 Mac OS X 10.14.0 -Mac Firefox 68.0 Mac OS X 10.14.0 -Mac Firefox 69.0 Mac OS X 10.14.0 -Mac Firefox 70.0 Mac OS X 10.14.0 -Mac Chrome 76.0 Mac OS X 10.10.5 -Mac Chrome 77.0 Mac OS X 10.10.5 -Mac Safari 10.1 Mac OS X 10.10.5 -Mac Chrome 76.0 Mac OS X 10.11.4 -Mac Chrome 72.0 Mac OS X 10.11.6 -Mac Chrome 76.0 Mac OS X 10.11.6 -Mac Chrome 76.0 Mac OS X 10.11.6 -Mac Chrome 77.0 Mac OS X 10.11.6 -Mac Safari 9.1 Mac OS X 10.11.6 -Mac Safari 10.0 Mac OS X 10.11.6 -Mac Safari 11.1 Mac OS X 10.11.6 -Mac Chrome 77.0 Mac OS X 10.12.1 -Mac Safari 10.0 Mac OS X 10.12.3 -Mac Chrome 75.0 Mac OS X 10.12.6 -Mac Chrome 76.0 Mac OS X 10.12.6 -Mac Chrome 76.0 Mac OS X 10.12.6 -Mac Chrome 77.0 Mac OS X 10.12.6 -Mac Safari 12.1 Mac OS X 10.12.6 -Mac Safari 11.0 Mac OS X 10.13.0 -Mac Chrome 77.0 Mac OS X 10.13.1 -Mac Chrome 77.0 Mac OS X 10.13.2 -Mac Chrome 76.0 Mac OS X 10.13.4 -Mac Chrome 76.0 Mac OS X 10.13.4 -Mac Chrome 76.0 Mac OS X 10.13.5 -Mac Chrome 75.0 Mac OS X 10.13.6 -Mac Chrome 76.0 Mac OS X 10.13.6 -Mac Chrome 77.0 Mac OS X 10.13.6 -Mac Safari 12.0 Mac OS X 10.13.6 -Mac Safari 12.1 Mac OS X 10.13.6 -Mac Safari 12.1 Mac OS X 10.13.6 -Mac Safari 13.0 Mac OS X 10.13.6 -Mac Safari 13.0 Mac OS X 10.13.6 -Mac Chrome 75.0 Mac OS X 10.14.0 -Mac Chrome 76.0 Mac OS X 10.14.0 -Mac Chrome 77.0 Mac OS X 10.14.0 -Mac Chrome 77.0 Mac OS X 10.14.1 -Mac Chrome 76.0 Mac OS X 10.14.2 -Mac Chrome 69.0 Mac OS X 10.14.3 -Mac Safari 12.0 Mac OS X 10.14.3 -Mac Chrome 75.0 Mac OS X 10.14.4 -Mac Chrome 77.0 Mac OS X 10.14.4 -Mac Safari 12.1 Mac OS X 10.14.4 -Mac Chrome 76.0 Mac OS X 10.14.5 -Mac Chrome 77.0 Mac OS X 10.14.5 -Mac Safari 12.1 Mac OS X 10.14.5 -Mac Chrome 75.0 Mac OS X 10.14.6 -Mac Chrome 76.0 Mac OS X 10.14.6 -Mac Chrome 76.0 Mac OS X 10.14.6 -Mac Chrome 77.0 Mac OS X 10.14.6 -Mac Chrome 77.0 Mac OS X 10.14.6 -Mac Safari 12.1 Mac OS X 10.14.6 -Mac Safari 13.0 Mac OS X 10.14.6 -Mac Chrome 65.0 Mac OS X 10.9.5 -Mac Chrome 66.0 Mac OS X 10.9.5 -Mac Chrome 67.0 Mac OS X 10.9.5 -PlayStation 4 Apple Mail 605.1 Other 0.0.0 -Samsung SMART-TV Safari 3.0 Tizen 3.0.0 -Samsung SMART-TV Samsung Internet 2.0 Tizen 3.0.0 -Samsung SMART-TV Samsung Internet 2.1 Tizen 4.0.0 -Samsung SMART-TV Samsung Internet 2.2 Tizen 5.0.0 -Other Edge 17.17134 Windows 10.0.0 -Other Edge 18.17763 Windows 10.0.0 -Other Chrome 77.0 Windows 10.0.0 -Other Maxthon 5.2 Windows 10.0.0 -Other Chrome 73.1 Windows 10.0.0 -Other Chrome 76.0 Windows 10.0.0 -Other Opera 63.0 Windows 10.0.0 -Other Chrome 77.0 Windows 10.0.0 -Other Chrome 77.0 Windows 10.0.0 -Other Coc Coc 82.0 Windows 10.0.0 -Other IE 11.0 Windows 10.0.0 -Other Firefox 59.0 Windows 10.0.0 -Other Firefox 60.0 Windows 10.0.0 -Other Edge 15.15063 Windows 10.0.0 -Other Edge 16.16299 Windows 10.0.0 -Other Edge 17.17134 Windows 10.0.0 -Other Edge 18.17763 Windows 10.0.0 -Other Chrome 65.0 Windows 10.0.0 -Other Chrome 70.0 Windows 10.0.0 -Other Edge 18.18362 Windows 10.0.0 -Other Edge 18.18995 Windows 10.0.0 -Other Edge 18.19493 Windows 10.0.0 -Other Chrome 70.0 Windows 10.0.0 -Other Chrome 71.0 Windows 10.0.0 -Other Chrome 73.0 Windows 10.0.0 -Other Chrome 74.0 Windows 10.0.0 -Other Chrome 75.0 Windows 10.0.0 -Other Chrome 76.0 Windows 10.0.0 -Other Vivaldi 2.7 Windows 10.0.0 -Other Chrome 76.0 Windows 10.0.0 -Other Opera 63.0 Windows 10.0.0 -Other Chrome 77.0 Windows 10.0.0 -Other Chrome 77.0 Windows 10.0.0 -Other Edge 79.0 Windows 10.0.0 -Other Edge 18.18362 Windows 10.0.0 -Other Edge 18.18363 Windows 10.0.0 -Other Edge 18.18362 Windows 10.0.0 -Other Firefox 61.0 Windows 10.0.0 -Other Firefox 63.0 Windows 10.0.0 -Other Firefox 67.0 Windows 10.0.0 -Other Firefox 68.0 Windows 10.0.0 -Other Firefox 69.0 Windows 10.0.0 -Other Firefox 69.0 Windows 10.0.0 -Other Chrome 49.0 Windows XP.0.0 -Other Chrome 49.0 Windows Vista.0.0 -Other Chrome 49.0 Windows Vista.0.0 -Other Chrome 76.0 Windows 7.0.0 -Other Chrome 77.0 Windows 7.0.0 -Other Chrome 77.0 Windows 7.0.0 -Other Coc Coc 80.0 Windows 7.0.0 -Other Coc Coc 82.0 Windows 7.0.0 -Other IE 11.0 Windows 7.0.0 -Other Chrome 67.0 Windows 7.0.0 -Other Chrome 70.0 Windows 7.0.0 -Other Chrome 72.0 Windows 7.0.0 -Other Chrome 74.0 Windows 7.0.0 -Other Chrome 75.0 Windows 7.0.0 -Other Chrome 76.0 Windows 7.0.0 -Other Chrome 76.0 Windows 7.0.0 -Other Chrome 77.0 Windows 7.0.0 -Other Waterfox 56.2 Windows 7.0.0 -Other Firefox 60.0 Windows 7.0.0 -Other Firefox 63.0 Windows 7.0.0 -Other Firefox 68.0 Windows 7.0.0 -Other Firefox 69.0 Windows 7.0.0 -Other Firefox 69.0 Windows 7.0.0 -Other Chrome 77.0 Windows 8.0.0 -Other Firefox 69.0 Windows 8.0.0 -Other Chrome 77.0 Windows 8.1.0 -Other IE 11.0 Windows RT 8.1.0 -Other IE 11.0 Windows 8.1.0 -Other IE 11.0 Windows 8.1.0 -Other Chrome 63.0 Windows 8.1.0 -Other Chrome 64.0 Windows 8.1.0 -Other Chrome 76.0 Windows 8.1.0 -Other Chrome 76.0 Windows 8.1.0 -Other Chrome 77.0 Windows 8.1.0 -Other Firefox 69.0 Windows 8.1.0 -Other Firefox 69.0 Windows 8.1.0 -Other Chrome 72.0 Windows 10.0.0 -Other Chrome 77.0 Chrome OS 12371.75.0 -Other Chrome 76.0 Chrome OS 12239.92.0 -Other Chrome 69.0 Chrome OS 10895.78.0 -Other Chrome 70.0 Chrome OS 11021.81.0 -Other Chrome 74.0 Chrome OS 11895.118.0 -Other Chrome 76.0 Chrome OS 12239.92.0 -Other Chrome 76.0 Chrome OS 12239.92.1 -Other Chrome 76.0 Chrome OS 12239.92.4 -Other Chrome 77.0 Chrome OS 12371.46.0 -Other Chrome 77.0 Chrome OS 12371.65.0 -Other Chrome 75.0 Linux 0.0.0 -Other Chrome 77.0 Linux 0.0.0 -Other Samsung Internet 10.1 Linux 0.0.0 -Other Chrome 66.0 Linux 0.0.0 -Other Chrome 66.0 Linux 0.0.0 -Other Chrome 66.0 Linux 0.0.0 -Other Chrome 66.0 Linux 0.0.0 -Other Chrome 66.0 Linux 0.0.0 -Other Firefox 65.0 Ubuntu 0.0.0 -Other Firefox 66.0 Ubuntu 0.0.0 -Other Firefox 67.0 Ubuntu 0.0.0 -iPad Google 22.0 iOS 10.3.3 -iPad Chrome Mobile iOS 71.0 iOS 10.3.3 -iPad Firefox iOS 14.0 iOS 10.3.3 -iPad Mobile Safari UI/WKWebView 0.0 iOS 10.3.3 -iPad Facebook 240.0 iOS 10.3.3 -iPad Mobile Safari 10.0 iOS 10.3.3 -iPad Mobile Safari 10.0 iOS 10.3.4 -iPad Chrome Mobile iOS 76.0 iOS 11.1.0 -iPad Chrome Mobile iOS 76.0 iOS 11.1.2 -iPad Mobile Safari 11.0 iOS 11.2.1 -iPad Mobile Safari 11.0 iOS 11.2.2 -iPad Mobile Safari 11.0 iOS 11.2.6 -iPad Mobile Safari 11.0 iOS 11.3.0 -iPad Mobile Safari 11.0 iOS 11.4.0 -iPad Mobile Safari UI/WKWebView 0.0 iOS 11.4.1 -iPad Mobile Safari 11.0 iOS 11.4.1 -iPad Google 83.0 iOS 12.0.0 -iPad Mobile Safari 12.0 iOS 12.0.0 -iPad Chrome Mobile iOS 75.0 iOS 12.1.0 -iPad Chrome Mobile iOS 76.0 iOS 12.1.0 -iPad Mobile Safari UI/WKWebView 0.0 iOS 12.1.0 -iPad Mobile Safari 12.0 iOS 12.1.0 -iPad Mobile Safari 12.0 iOS 12.1.1 -iPad Google 48.0 iOS 12.1.4 -iPad Mobile Safari UI/WKWebView 0.0 iOS 12.1.4 -iPad Mobile Safari 12.0 iOS 12.1.4 -iPad Chrome Mobile iOS 76.0 iOS 12.2.0 -iPad Mobile Safari UI/WKWebView 0.0 iOS 12.2.0 -iPad Mobile Safari 12.1 iOS 12.2.0 -iPad Chrome Mobile iOS 77.0 iOS 12.3.0 -iPad Google 83.0 iOS 12.3.0 -iPad Mobile Safari 12.1 iOS 12.3.0 -iPad Mobile Safari UI/WKWebView 0.0 iOS 12.3.1 -iPad Mobile Safari 12.1 iOS 12.3.1 -iPad Chrome Mobile iOS 76.0 iOS 12.4.0 -iPad Chrome Mobile iOS 76.0 iOS 12.4.0 -iPad Chrome Mobile iOS 77.0 iOS 12.4.0 -iPad Chrome Mobile iOS 77.0 iOS 12.4.0 -iPad Chrome Mobile iOS 77.0 iOS 12.4.0 -iPad Google 74.0 iOS 12.4.0 -iPad Google 83.0 iOS 12.4.0 -iPad Mobile Safari UI/WKWebView 0.0 iOS 12.4.0 -iPad Mobile Safari 12.1 iOS 12.4.0 -iPad Chrome Mobile iOS 67.0 iOS 12.4.1 -iPad Firefox iOS 19.0 iOS 12.4.1 -iPad Mobile Safari UI/WKWebView 0.0 iOS 12.4.1 -iPad Facebook 0.0 iOS 12.4.1 -iPad Facebook 0.0 iOS 12.4.1 -iPad Facebook 0.0 iOS 12.4.1 -iPad Facebook 0.0 iOS 12.4.1 -iPad Mobile Safari 12.1 iOS 12.4.1 -iPad Mobile Safari 6.0 iOS 6.1.3 -iPad Mobile Safari 8.0 iOS 8.0.0 -iPad Mobile Safari 8.0 iOS 8.2.0 -iPad Google 23.1 iOS 8.4.0 -iPad Mobile Safari 9.0 iOS 9.3.2 -iPad Mobile Safari 9.0 iOS 9.3.5 -iPhone Mobile Safari 10.0 iOS 10.2.0 -iPhone Facebook 0.0 iOS 10.3.3 -iPhone Google 68.0 iOS 10.3.4 -iPhone Mobile Safari 10.0 iOS 10.3.4 -iPhone Mobile Safari 11.0 iOS 11.0.3 -iPhone Mobile Safari 11.0 iOS 11.1.1 -iPhone Mobile Safari 11.0 iOS 11.1.2 -iPhone Mobile Safari 11.0 iOS 11.2.1 -iPhone Facebook 207.0 iOS 11.2.6 -iPhone Chrome Mobile iOS 76.0 iOS 11.3.0 -iPhone Facebook 0.0 iOS 11.3.0 -iPhone Mobile Safari 11.0 iOS 11.3.0 -iPhone Google 83.0 iOS 11.4.0 -iPhone Mobile Safari 11.0 iOS 11.4.0 -iPhone Google 74.1 iOS 11.4.1 -iPhone Mobile Safari 11.0 iOS 11.4.1 -iPhone Mobile Safari 12.0 iOS 12.0.0 -iPhone Mobile Safari 12.0 iOS 12.1.0 -iPhone Mobile Safari 12.0 iOS 12.1.1 -iPhone Google 74.1 iOS 12.1.2 -iPhone Facebook 0.0 iOS 12.1.2 -iPhone Mobile Safari 12.0 iOS 12.1.2 -iPhone Mobile Safari 12.0 iOS 12.1.3 -iPhone Google 74.1 iOS 12.1.4 -iPhone Mobile Safari 12.0 iOS 12.1.4 -iPhone Chrome Mobile iOS 72.0 iOS 12.2.0 -iPhone Chrome Mobile iOS 76.0 iOS 12.2.0 -iPhone Chrome Mobile iOS 77.0 iOS 12.2.0 -iPhone Facebook 0.0 iOS 12.2.0 -iPhone Facebook 0.0 iOS 12.2.0 -iPhone Mobile Safari 12.1 iOS 12.2.0 -iPhone Chrome Mobile iOS 77.0 iOS 12.3.0 -iPhone Google 83.0 iOS 12.3.0 -iPhone Mobile Safari 12.1 iOS 12.3.0 -iPhone Google 79.0 iOS 12.3.1 -iPhone Mobile Safari UI/WKWebView 0.0 iOS 12.3.1 -iPhone DuckDuckGo Mobile 7.0 iOS 12.3.1 -iPhone Facebook 0.0 iOS 12.3.1 -iPhone Facebook 0.0 iOS 12.3.1 -iPhone Facebook 0.0 iOS 12.3.1 -iPhone Mobile Safari 12.1 iOS 12.3.1 -iPhone Mobile Safari 12.1 iOS 12.3.2 -iPhone Chrome Mobile iOS 69.0 iOS 12.4.0 -iPhone Chrome Mobile iOS 73.0 iOS 12.4.0 -iPhone Chrome Mobile iOS 75.0 iOS 12.4.0 -iPhone Chrome Mobile iOS 76.0 iOS 12.4.0 -iPhone Chrome Mobile iOS 77.0 iOS 12.4.0 -iPhone Chrome Mobile iOS 77.0 iOS 12.4.0 -iPhone Google 81.0 iOS 12.4.0 -iPhone Google 82.1 iOS 12.4.0 -iPhone Google 83.0 iOS 12.4.0 -iPhone Facebook 0.0 iOS 12.4.0 -iPhone Facebook 0.0 iOS 12.4.0 -iPhone Facebook 0.0 iOS 12.4.0 -iPhone Facebook 0.0 iOS 12.4.0 -iPhone Mobile Safari 12.1 iOS 12.4.0 -iPhone Google 74.1 iOS 12.4.1 -iPhone Mobile Safari UI/WKWebView 0.0 iOS 12.4.1 -iPhone Instagram 89.0 iOS 12.4.1 -iPhone Facebook 240.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Facebook 0.0 iOS 12.4.1 -iPhone Mobile Safari 12.1 iOS 12.4.1 -iPhone Mobile Safari 12.4 iOS 12.4.1 -iPhone Mobile Safari UI/WKWebView 0.0 iOS 12.4.2 -iPhone Mobile Safari 12.1 iOS 12.4.2 -iPhone Chrome Mobile iOS 77.0 iOS 13.0.0 -iPhone Facebook 0.0 iOS 13.0.0 -iPhone Facebook 0.0 iOS 13.0.0 -iPhone Facebook 0.0 iOS 13.0.0 -iPhone Facebook 0.0 iOS 13.0.0 -iPhone Facebook 0.0 iOS 13.0.0 -iPhone Facebook 0.0 iOS 13.0.0 -iPhone Mobile Safari 13.0 iOS 13.0.0 -iPhone Chrome Mobile iOS 76.0 iOS 13.1.0 -iPhone Chrome Mobile iOS 77.0 iOS 13.1.0 -iPhone Chrome Mobile iOS 77.0 iOS 13.1.0 -iPhone Firefox iOS 8.1 iOS 13.1.0 -iPhone Google 83.0 iOS 13.1.0 -iPhone Mobile Safari UI/WKWebView 0.0 iOS 13.1.0 -iPhone DuckDuckGo Mobile 7.0 iOS 13.1.0 -iPhone Facebook 0.0 iOS 13.1.0 -iPhone Facebook 0.0 iOS 13.1.0 -iPhone Facebook 0.0 iOS 13.1.0 -iPhone Facebook 0.0 iOS 13.1.0 -iPhone Facebook 0.0 iOS 13.1.0 -iPhone Mobile Safari 13.0 iOS 13.1.0 -iPhone Mobile Safari UI/WKWebView 0.0 iOS 13.1.1 -iPhone Facebook 0.0 iOS 13.1.1 -iPhone Facebook 0.0 iOS 13.1.1 -iPhone Facebook 0.0 iOS 13.1.1 -iPhone Facebook 0.0 iOS 13.1.1 -iPhone Mobile Safari 13.0 iOS 13.1.1 -iPhone Mobile Safari UI/WKWebView 0.0 iOS 13.1.2 -iPhone Facebook 0.0 iOS 13.1.2 -iPhone Mobile Safari 13.0 iOS 13.1.2 +AppleCoreMedia/1.0.0.12B466 (Apple TV; U; CPU OS 8_1_3 like Mac OS X; en_us) AppleTV Other 0.0 ATV OS X 0.0.0 +Mozilla/5.0 (Android 7.0; Mobile; LG-M150; rv:68.0) Gecko/68.0 Firefox/68.0 LG-M150 Firefox Mobile 68.0 Android 7.0.0 +Mozilla/5.0 (Android 8.0.0; Mobile; rv:68.0) Gecko/68.0 Firefox/68.0 Generic Smartphone Firefox Mobile 68.0 Android 8.0.0 +Mozilla/5.0 (Android 8.1.0; Tablet; rv:68.0) Gecko/68.0 Firefox/68.0 Generic Tablet Firefox Mobile 68.0 Android 8.1.0 +Mozilla/5.0 (Android 9; Mobile; rv:68.0) Gecko/68.0 Firefox/68.0 Generic Smartphone Firefox Mobile 68.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 10; PH-1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 PH-1 Chrome Mobile 77.0 Android 10.0.0 +Mozilla/5.0 (Linux; Android 10; Pixel 2 XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel 2 XL Chrome Mobile 77.0 Android 10.0.0 +Mozilla/5.0 (Linux; Android 10; Pixel 2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel 2 Chrome Mobile 77.0 Android 10.0.0 +Mozilla/5.0 (Linux; Android 10; Pixel 3 Build/QP1A.190711.020.C3; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Pixel 3 Facebook 240.0 Android 10.0.0 +Mozilla/5.0 (Linux; Android 10; Pixel XL Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel XL Chrome Mobile WebView 77.0 Android 10.0.0 +Mozilla/5.0 (Linux; Android 10; Pixel XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel XL Chrome Mobile 77.0 Android 10.0.0 +Mozilla/5.0 (Linux; Android 4.0.3; HTC Sensation 4G Build/IML74K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.111 Mobile Safari/537.36 HTC Sensation 4G Chrome Mobile 42.0 Android 4.0.3 +Mozilla/5.0 (Linux; Android 4.0.3; KFTT) AppleWebKit/537.36 (KHTML, like Gecko) Silk/73.7.5 like Chrome/73.0.3683.90 Safari/537.36 Kindle Amazon Silk 73.7 Android 4.0.3 +Mozilla/5.0 (Linux; Android 4.2.2; GT-I9152 Build/JDQ39) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.111 Mobile Safari/537.36 Samsung GT-I9152 Chrome Mobile 42.0 Android 4.2.2 +Mozilla/5.0 (Linux; Android 4.4.2; GT-N5110) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Samsung GT-N5110 Chrome 76.0 Android 4.4.2 +Mozilla/5.0 (Linux; Android 4.4.2; RCT6773W22) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 RCT6773W22 Chrome 77.0 Android 4.4.2 +Mozilla/5.0 (Linux; Android 4.4.2; SM-T217S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T217S Chrome 77.0 Android 4.4.2 +Mozilla/5.0 (Linux; Android 4.4.2; SM-T530NU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T530NU Chrome 77.0 Android 4.4.2 +Mozilla/5.0 (Linux; Android 4.4.2; TegraNote-P1640 Build/KOT49H; en-us) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 TegraNote-P1640 Chrome 69.0 Android 4.4.2 +Mozilla/5.0 (Linux; Android 4.4.3; KFTHWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 4.4.3 +Mozilla/5.0 (Linux; Android 5.0.2; SM-A500H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36 Samsung SM-A500H Chrome Mobile 73.0 Android 5.0.2 +Mozilla/5.0 (Linux; Android 5.0.2; SM-T357T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T357T Chrome 77.0 Android 5.0.2 +Mozilla/5.0 (Linux; Android 5.0.2; SM-T530NU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Samsung SM-T530NU Chrome 76.0 Android 5.0.2 +Mozilla/5.0 (Linux; Android 5.0.2; SM-T530NU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T530NU Chrome 77.0 Android 5.0.2 +Mozilla/5.0 (Linux; Android 5.0; RCT6213W87DK) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 YaBrowser/19.4.1.454.01 Safari/537.36 RCT6213W87DK Yandex Browser 19.4 Android 5.0.0 +Mozilla/5.0 (Linux; Android 5.0; SM-N900T Build/LRX21V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/229.0.0.35.117;] Samsung SM-N900T Facebook 229.0 Android 5.0.0 +Mozilla/5.0 (Linux; Android 5.1.1) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Focus/4.4.1 Chrome/70.0.3538.110 Mobile Safari/537.36 Generic Smartphone Chrome Mobile WebView 70.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; AFTB) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.16 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; AFTT Build/LVY48F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.110 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.26 AFTT Chrome Mobile WebView 70.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; AFTT) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.16 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFAUWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFDOWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/71.2.4 like Chrome/71.0.3578.98 Safari/537.36 Kindle Amazon Silk 71.2 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFDOWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFFOWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFGIWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFSUWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; KFSUWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/77.1.127 like Chrome/77.0.3865.92 Safari/537.36 Kindle Amazon Silk 77.1 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; LG-AS330) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-AS330 Chrome Mobile 77.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; LGL43AL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LGL43AL Chrome Mobile 77.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-G530R7 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.2 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-G530R7 Samsung Internet 9.2 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-T377P) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Safari/537.36 Samsung SM-T377P Samsung Internet 10.1 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG SM-T900) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Safari/537.36 Samsung SM-T900 Samsung Internet 10.1 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SAMSUNG-SM-T337A Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Samsung SM-T337A Chrome 69.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SM-G360T1 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.81 Mobile Safari/537.36 Samsung SM-G360T1 Chrome Mobile 67.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SM-J320FN) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 Samsung SM-J320FN Chrome Mobile 74.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SM-T280) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36 SM-T280 Chrome 74.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SM-T330NU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Samsung SM-T330NU Chrome 71.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SM-T670) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 SM-T670 Chrome 76.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; SM-T670) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T670 Chrome 77.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1.1; Vodafone Smart ultra 6 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36 Vodafone Smart ultra 6 Chrome Mobile WebView 74.0 Android 5.1.1 +Mozilla/5.0 (Linux; Android 5.1; BLU Advance 5.0 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.158 Mobile Safari/537.36 BLU Advance 5.0 Chrome Mobile 66.0 Android 5.1.0 +Mozilla/5.0 (Linux; Android 5.1; HTC Desire 626s) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 HTC Desire 626s Chrome Mobile 77.0 Android 5.1.0 +Mozilla/5.0 (Linux; Android 5.1; HUAWEI LUA-L22 Build/HUAWEILUA-L22) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.89 Mobile Safari/537.36 HUAWEI LUA-L22 Chrome Mobile 50.0 Android 5.1.0 +Mozilla/5.0 (Linux; Android 5.1; NX16A11264) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 NX16A11264 Chrome 77.0 Android 5.1.0 +Mozilla/5.0 (Linux; Android 5.1; XT1526) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36 XT1526 Chrome Mobile 73.0 Android 5.1.0 +Mozilla/5.0 (Linux; Android 6.0.1; CPH1613) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Oppo CPH1613 Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; LG-M153 Build/MXB48T; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.91 Mobile Safari/537.36 LG-M153 Chrome Mobile WebView 55.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; LG-M153) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-M153 Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; LGLS676) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LGLS676 Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; N9136) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 N9136 Chrome Mobile 74.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.133 Safari/537.36 Asus Nexus 7 Chrome 44.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G900I) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G900I Samsung Internet 10.1 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-G900P Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/7.2 Chrome/59.0.3071.125 Mobile Safari/537.36 Samsung SM-G900P Samsung Internet 7.2 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-J700M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J700M Samsung Internet 10.1 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG SM-S327VL) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-S327VL Samsung Internet 10.1 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SAMSUNG-SM-T377A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T377A Chrome 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G532M Build/MMB29T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36 Samsung SM-G532M Chrome Mobile 55.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G532M Build/MMB29T; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.101 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-G532M Facebook 240.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G532M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G532M Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G550T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-G550T Chrome Mobile 76.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G550T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G550T Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G550T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-G550T1 Chrome Mobile 76.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G900V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Mobile Safari/537.36 Samsung SM-G900V Chrome Mobile 73.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-G920A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G920A Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-J327P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J327P Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-N910S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Mobile Safari/537.36 Samsung SM-N910S Chrome Mobile 75.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-N920V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.89 Mobile Safari/537.36 Samsung SM-N920V Chrome Mobile 76.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-T350 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Safari/537.36 Samsung SM-T350 Chrome 59.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-T560NU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T560NU Chrome 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; SM-T800) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T800 Chrome 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; XT1254) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XT1254 Chrome Mobile 77.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; Z798BL Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Mobile Safari/537.36 Z798BL Chrome Mobile 67.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0.1; Z799VL Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/45.0.2454.95 Mobile Safari/537.36 Z799VL Chrome Mobile WebView 45.0 Android 6.0.1 +Mozilla/5.0 (Linux; Android 6.0; 5010X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.89 Mobile Safari/537.36 5010X Chrome Mobile 76.0 Android 6.0.0 +Mozilla/5.0 (Linux; Android 6.0; CAM-L21) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Huawei CAM-L21 Chrome Mobile 77.0 Android 6.0.0 +Mozilla/5.0 (Linux; Android 6.0; F3313) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 F3313 Chrome Mobile 77.0 Android 6.0.0 +Mozilla/5.0 (Linux; Android 6.0; RCT6603W47M7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 RCT6603W47M7 Chrome 77.0 Android 6.0.0 +Mozilla/5.0 (Linux; Android 7.0; 5049Z Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36 5049Z Chrome Mobile 56.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; ASUS_A002A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Asus A002A Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; Alcatel_5044C) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Alcatel_5044C Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; Astra Young Pro Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/59.0.3071.125 Mobile Safari/537.36 Astra Young Pro Chrome Mobile WebView 59.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; Infinix X571) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Infinix X571 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-H872 Build/NRD90U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.123 Mobile Safari/537.36 LG-H872 Chrome Mobile 64.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-K425 Build/NRD90U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36 LG-K425 Chrome Mobile 55.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-LS777) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-LS777 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-M210) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-M210 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-M430) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-M430 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-TP260 Build/NRD90U; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/64.0.3282.137 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 LG-TP260 Chrome Mobile WebView 64.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-TP260) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-TP260 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-TP450 Build/NRD90U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36 LG-TP450 Chrome Mobile 64.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-V521) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Safari/537.36 LG-V521 Chrome 75.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LG-V521) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 LG-V521 Chrome 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LGMP260 Build/NRD90U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36 LGMP260 Chrome Mobile 58.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LGMS210 Build/NRD90U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36 LGMS210 Chrome Mobile 55.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; LGMS210) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LGMS210 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; P00I) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 P00I Chrome 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; RS988) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 RS988 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-J701F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J701F Samsung Internet 10.1 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-J710F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J710F Samsung Internet 10.1 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-N920T Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.2 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-N920T Samsung Internet 9.2 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SAMSUNG-SM-G920A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G920A Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-G920P Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 Flipboard/4.2.23/4722,4.2.23.4722 Samsung SM-G920P Flipboard 4.2 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-G920V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-G920V Chrome Mobile 76.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-G928V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G928V Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G950U Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-G955U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G955U Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-J327T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 Samsung SM-J327T Chrome Mobile 74.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-J327T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J327T Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-J327T1 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36 Samsung SM-J327T1 Chrome Mobile 64.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-J327T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 Samsung SM-J327T1 Chrome Mobile 75.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-J327T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J327T1 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-N9208) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36 Samsung SM-N9208 Chrome Mobile 73.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-N920P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 Samsung SM-N920P Chrome Mobile 74.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-N920T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N920T Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-T585) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T585 Chrome 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-T810) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Safari/537.36 SM-T810 Chrome 75.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-T810) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Safari/537.36 SM-T810 Chrome 76.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-T810) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T810 Chrome 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-T813) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Safari/537.36 SM-T813 Chrome 76.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; SM-T813) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 SM-T813 Chrome 76.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; ST1009X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Safari/537.36 Trekstor ST1009X Chrome 75.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0; XT1663) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XT1663 Chrome Mobile 77.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Mobile Safari/537.36 Generic Smartphone Chrome Mobile 58.0 Android 7.0.0 +Mozilla/5.0 (Linux; Android 7.1.1; A574BL Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 A574BL Chrome Mobile WebView 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; A574BL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 A574BL Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; CPH1729 Build/N6F26Q; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Oppo CPH1729 Facebook 240.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Coolpad 3632A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 3632A Chrome Mobile 74.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; General Mobile 4G Dual) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 General Mobile 4G Dual Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Moto E (4) Plus Build/NCRS26.58-44-20; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/76.0.3809.111 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Moto E (4) Plus Chrome Mobile WebView 76.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Moto E (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Mobile Safari/537.36 Moto E (4) Chrome Mobile 70.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Moto E (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Moto E (4) Chrome Mobile 76.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Moto E (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.73 Mobile Safari/537.36 Moto E (4) Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Moto E (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Moto E (4) Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; NX591J) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 NX591J Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; REVVLPLUS C3701A Build/143.54.190611.3701A-TMO) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36 REVVLPLUS C3701A Chrome Mobile 64.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SAMSUNG SM-J320A) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J320A Samsung Internet 10.1 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SAMSUNG SM-T550) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Safari/537.36 Samsung SM-T550 Samsung Internet 10.1 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SAMSUNG-SM-T377A Build/NMF26X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Safari/537.36 Samsung SM-T377A Chrome 64.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-J250F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-J250F Chrome Mobile 76.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-J700T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J700T Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-T350) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T350 Chrome 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-T377T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.73 Safari/537.36 Samsung SM-T377T Chrome 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-T550 Build/NMF26X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Samsung SM-T550 Chrome 69.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-T550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T550 Chrome 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; SM-T560NU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T560NU Chrome 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; X20 Build/N6F26Q; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.100 Mobile Safari/537.36 X20 Chrome Mobile WebView 52.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Z851M Build/NMF26V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36 Z851M Chrome Mobile 58.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Z899VL Build/NMF26V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Z899VL Chrome Mobile WebView 74.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Z982 Build/NMF26V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.143 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Z982 Chrome Mobile WebView 75.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.1; Z982) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Z982 Chrome Mobile 77.0 Android 7.1.1 +Mozilla/5.0 (Linux; Android 7.1.2) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Focus/4.4.1 Chrome/70.0.3538.110 Mobile Safari/537.36 Generic Smartphone Chrome Mobile WebView 70.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; AFTKMST12 Build/NS6265; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.110 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.26 AFTKMST12 Chrome Mobile WebView 70.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; AFTKMST12) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.16 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; AFTMM Build/NS6265; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.110 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.26 AFTMM Chrome Mobile WebView 70.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; AFTN Build/NS6265; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.110 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.26 AFTN Chrome Mobile WebView 70.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; KFKAWI Build/NS6301; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/59.0.3071.125 Safari/537.36 KFKAWI Chrome Mobile WebView 59.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; KFKAWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; KFMUWI) AppleWebKit/537.36 (KHTML, like Gecko) Silk/76.3.6 like Chrome/76.0.3809.132 Safari/537.36 Kindle Amazon Silk 76.3 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; LG-SP200) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Mobile Safari/537.36 LG-SP200 Chrome Mobile 75.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; LG-SP200) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 LG-SP200 Chrome Mobile 76.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; LM-X210(G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 LM-X210(G) Chrome Mobile 76.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; LM-X210) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 LM-X210 Chrome Mobile 76.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; RCT6973W43R) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 RCT6973W43R Chrome 77.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 7.1.2; Redmi 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XiaoMi Redmi 4 Chrome Mobile 77.0 Android 7.1.2 +Mozilla/5.0 (Linux; Android 8.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/76.0.3809.132 Mobile Safari/537.36 Generic Smartphone Chrome Mobile WebView 76.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; ASUS_Z01FD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Asus Z01FD Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; AUM-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Huawei AUM-L29 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; BRAVIA 4K GB Build/OPR2.170623.027.S25; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 BRAVIA 4K GB Chrome Mobile WebView 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; CMR-W09) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 CMR-W09 Chrome 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; EVA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.73 Mobile Safari/537.36 EVA-AL00 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; G3223) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 G3223 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; LG-H910) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-H910 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; LG-H931) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 LG-H931 Chrome Mobile 76.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; LG-H932) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-H932 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-A520F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-A520F Samsung Internet 10.1 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-G891A Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/8.2 Chrome/63.0.3239.111 Mobile Safari/537.36 Samsung SM-G891A Samsung Internet 8.2 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-G935T) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G935T Samsung Internet 10.1 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-G955U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G955U Samsung Internet 10.1 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-J337T Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.2 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-J337T Samsung Internet 9.2 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-J737P) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J737P Samsung Internet 10.1 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG SM-N950F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N950F Samsung Internet 10.1 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG-SM-G891A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Mobile Safari/537.36 Samsung SM-G891A Chrome Mobile 72.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SAMSUNG-SM-G935A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-G935A Chrome Mobile 76.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-A720F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A720F Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G570F Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/73.0.3683.90 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/231.0.0.39.113;] Samsung SM-G570F Facebook 231.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G570Y) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G570Y Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G930T Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-G930T Chrome Mobile WebView 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G930V Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36 Samsung SM-G930V Chrome Mobile 64.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G930VL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 Samsung SM-G930VL Chrome Mobile 74.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G935F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Mobile Safari/537.36 Samsung SM-G935F Chrome Mobile 75.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G935P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G935P Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G935T Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-G935T Facebook 240.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G935T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G935T Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G950U Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G955U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.112 Mobile Safari/537.36 Samsung SM-G955U Chrome Mobile 74.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-G955U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G955U Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-J330G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J330G Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-J337T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J337T Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-J737A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J737A Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-J737T1 Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.126 Mobile Safari/537.36 Samsung SM-J737T1 Chrome Mobile 66.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-J737T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J737T1 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-N950F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.126 Mobile Safari/537.36 Samsung SM-N950F Chrome Mobile 66.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-N950U Chrome Mobile 76.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N950U Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-N950U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N950U1 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; SM-S367VL Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 OPT/1.22.80 Samsung SM-S367VL Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; VS995) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 VS995 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; XT1635-02) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XT1635-02 Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; moto e5 play) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 moto e5 play Chrome Mobile 76.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; moto e5 play) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 moto e5 play Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; moto e5 supra) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 moto e5 supra Chrome Mobile 76.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.0.0; moto g(6)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 moto g(6) Chrome Mobile 77.0 Android 8.0.0 +Mozilla/5.0 (Linux; Android 8.1.0; 5041C) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 5041C Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; 6062W) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 6062W Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; A502DL Build/OPM1.171019.011) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Mobile Safari/537.36 A502DL Chrome Mobile 67.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; A502DL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 A502DL Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; BKK-LX2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Huawei BKK-LX2 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; C4 Build/OPM2.171019.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 C4 Chrome Mobile 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; Coolpad 3310A Build/3310A.SPRINT.190213.0S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 3310A Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; Infinix X604 Build/O11019) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.137 Mobile Safari/537.36 Infinix X604 Chrome Mobile 64.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; Joy 1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Joy 1 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LAVA LE9820) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LAVA LE9820 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LG-Q710AL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LG-Q710AL Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-Q610(FGN)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-Q610(FGN) Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-Q710(FGN) Build/OPM1.171019.019; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/235.0.0.38.118;] LM-Q710(FGN) Facebook 235.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-Q710(FGN)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 LM-Q710(FGN) Chrome Mobile 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-Q710(FGN)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 LM-Q710(FGN) Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-Q710(FGN)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 LM-Q710(FGN) Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-Q710(FGN)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-Q710(FGN) Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-V405) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-V405 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X210(G) Build/OPM1.171019.026; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 agentweb/4.0.2 UCBrowser/11.6.4.950 LM-X210(G) UC Browser 11.6 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X210(G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 LM-X210(G) Chrome Mobile 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X210(G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.105 Mobile Safari/537.36 LM-X210(G) Chrome Mobile 72.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X210(G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-X210(G) Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X212(G)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-X212(G) Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X220) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 LM-X220 Chrome Mobile 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X220) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 LM-X220 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X220PM Build/O11019; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 LM-X220PM Chrome Mobile WebView 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X410(FG)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 LM-X410(FG) Chrome Mobile 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X410(FG)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 LM-X410(FG) Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X410(FG)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-X410(FG) Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LM-X410.FGN Build/OPM1.171019.019) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.91 Mobile Safari/537.36 LM-X410.FGN Chrome Mobile 68.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LML414DL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 LML414DL Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; LML713DL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LML713DL Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; Moto G (5S) Plus) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Moto G (5S) Plus Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; One) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.110 Mobile Safari/537.36/TansoDL HTC One Chrome Mobile WebView 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; RCT6873W42BMF8KC Build/O11019) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 RCT6873W42BMF8KC Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; REVVL 2 Build/OPM1.171019.011) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Mobile Safari/537.36 REVVL 2 Chrome Mobile 67.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; REVVL 2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 REVVL 2 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SAMSUNG SM-J727T) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J727T Samsung Internet 10.1 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SAMSUNG SM-J727T1 Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.4 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-J727T1 Samsung Internet 9.4 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SAMSUNG SM-J727T1) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J727T1 Samsung Internet 10.1 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SAMSUNG SM-T580 Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.4 Chrome/67.0.3396.87 Safari/537.36 Samsung SM-T580 Samsung Internet 9.4 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SAMSUNG-SM-J727A Build/M1AJQ; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-J727A Facebook 240.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-G610F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G610F Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J260T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 Samsung SM-J260T1 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J260T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-J260T1 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J260T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J260T1 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J410F Build/M1AJB) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J410F Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727P Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.91 Mobile Safari/537.36 Samsung SM-J727P Chrome Mobile 68.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727T Build/M1AJQ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.126 Mobile Safari/537.36 Samsung SM-J727T Chrome Mobile 66.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 Samsung SM-J727T1 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.73 Mobile Safari/537.36 Samsung SM-J727T1 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727T1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J727T1 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 Samsung SM-J727V Chrome Mobile 70.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-J727V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J727V Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-P580) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-P580 Chrome 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-T380) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Safari/537.36 SM-T380 Chrome 75.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-T580) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Safari/537.36 EdgA/42.0.2.3928 SM-T580 Edge Mobile 42.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-T580) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 SM-T580 Chrome 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-T580) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.89 Safari/537.36 SM-T580 Chrome 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-T580) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T580 Chrome 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; SM-T837T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T837T Chrome 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; TECNO CF8 Build/O11019; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/239.0.0.41.152;] TECNO CF8 Facebook 239.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; V1818CA) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 V1818CA Chrome Mobile 75.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; meizu C9 Build/OPM2.171019.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.91 Mobile Safari/537.36 meizu C9 Chrome Mobile 68.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; vivo 1724) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 vivo 1724 Chrome Mobile 76.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 8.1.0; vivo 1814) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 vivo 1814 Chrome Mobile 77.0 Android 8.1.0 +Mozilla/5.0 (Linux; Android 9) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 DuckDuckGo/5 Generic Smartphone DuckDuckGo Mobile 5.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; 1825) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36 1825 Chrome Mobile 70.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ANE-LX2 Build/HUAWEIANE-L22; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/76.0.3809.132 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/236.0.0.40.117;] ANE-LX2 Facebook 236.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; BLA-A09) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 BLA-A09 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; CLT-L04) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Huawei CLT-L04 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; CPH1911 Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/239.0.0.41.152;] Oppo CPH1911 Facebook 239.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; CPH1923 Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/76.0.3809.111 Mobile Safari/537.36 Oppo CPH1923 Chrome Mobile WebView 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ELE-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Huawei ELE-L29 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; G8142) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 G8142 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; GM1911) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 GM1911 Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; GM1917) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 GM1917 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; INE-LX2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 Huawei INE-LX2 Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-G710 Build/PKQ1.181105.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 LM-G710 Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-Q720) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-Q720 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-V405 Build/PKQ1.190202.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.15 LM-V405 Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-V405) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 LM-V405 Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-V500N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-V500N Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-X420) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Mobile Safari/537.36 LM-X420 Chrome Mobile 72.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; LM-X420) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 LM-X420 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; MAR-LX1A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 MAR-LX1A Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; MI 9) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XiaoMi MI 9 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Mi A2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XiaoMi Mi A2 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Moto Z (2)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Moto Z (2) Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Nokia 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Nokia 6 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ONEPLUS A6000) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 OnePlus ONEPLUS A6000 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ONEPLUS A6003) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 OnePlus ONEPLUS A6003 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ONEPLUS A6013 Build/PKQ1.180716.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 OnePlus ONEPLUS A6013 Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ONEPLUS A6013) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 OnePlus ONEPLUS A6013 Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; ONEPLUS A6013) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 OnePlus ONEPLUS A6013 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; PAR-AL00 Build/HUAWEIPAR-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/235.0.0.38.118;] PAR-AL00 Facebook 235.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Pixel 2 XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel 2 XL Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.190105.004; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel 3 Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Pixel 3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 Pixel 3 Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Pixel 3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel 3 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Pixel 3a XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Pixel 3a XL Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; REVVLRY ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36 REVVLRY Chrome Mobile 73.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; RMX1801) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 Oppo RMX1801 Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Redmi 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 XiaoMi Redmi 7 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; Redmi Note 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.89 Mobile Safari/537.36 XiaoMi Redmi Note 7 Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-A102U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-A102U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-A505FN) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-A505FN Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-A505GN) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-A505GN Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G892U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G892U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G950U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G955F Build/PPR1.180610.011) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.4 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-G955F Samsung Internet 9.4 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G955U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G955U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G9600 Build/PPR1.180610.011) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.4 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-G9600 Samsung Internet 9.4 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G960U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G965U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G965U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G970F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G970F Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G970U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G970U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G973U Build/PPR1.180610.011) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.4 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-G973U Samsung Internet 9.4 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G973U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G973U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-G975U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G975U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-J415F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J415F Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-J730F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J730F Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-J737P) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-J737P Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-J737T Build/PPR1.180610.011) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.0 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-J737T Samsung Internet 9.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N950U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N960F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N960F Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N960U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N960U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N960U1 Build/PPR1.180610.011) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/9.2 Chrome/67.0.3396.87 Mobile Safari/537.36 Samsung SM-N960U1 Samsung Internet 9.2 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N970U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N970U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N975U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N975U Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N975U1) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-N975U1 Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-T510) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Safari/537.36 Samsung SM-T510 Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-T720) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Safari/537.36 Samsung SM-T720 Samsung Internet 10.1 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SHIELD Android TV Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 SHIELD Android TV Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A102U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Mobile Safari/537.36 Samsung SM-A102U Chrome Mobile 72.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A102U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A102U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A105M Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/237.0.0.44.120;] Samsung SM-A105M Facebook 237.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A205G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A205G Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A205U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A205U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A505F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A505F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A530F Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-A530F Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A530N Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;KAKAOTALK 1908560 Samsung SM-A530N Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A600T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A600T Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A605F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A605F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-A920F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-A920F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G892A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.136 Mobile Safari/537.36 Samsung SM-G892A Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G950F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-G950U Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G950U Chrome Mobile 71.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.111 Mobile Safari/537.36 Samsung SM-G950U Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-G950U Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G950U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G950U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G950U1 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G955F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G955F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G955U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.73 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-G955U Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G955U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G955U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G9600 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/73.0.3683.90 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/233.0.0.36.117;] Samsung SM-G960U Facebook 233.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-G960U Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.99 Mobile Safari/537.36 Samsung SM-G960U Chrome Mobile 71.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.136 Mobile Safari/537.36 Samsung SM-G960U Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G960U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U1 Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-G960U1 Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G960U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G960U1 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G965F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G965F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G965U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.136 Mobile Safari/537.36 Samsung SM-G965U Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G965U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G965U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G965U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3921.2 Mobile Safari/537.36 Samsung SM-G965U Chrome Mobile 79.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G965U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G965U1 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G970U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-G970U Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G970U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 Samsung SM-G970U Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G970U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G970U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G970U1 Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G970U1 Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G973U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.136 Mobile Safari/537.36 Samsung SM-G973U Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G973U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G973U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G973U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G973U1 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G975U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 Samsung SM-G975U Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G975U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-G975U Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G975U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G975U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-G975U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-G975U1 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J260A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J260A Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J337P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-J337P Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J600FN) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.89 Mobile Safari/537.36 Samsung SM-J600FN Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J600G Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.73 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/238.0.0.41.116;] Samsung SM-J600G Facebook 238.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J730F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J730F Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J737A Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-J737A Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J737A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.136 Mobile Safari/537.36 Samsung SM-J737A Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J737V Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.101 Mobile Safari/537.36 [Pinterest/Android] Samsung SM-J737V Pinterest 0.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J737V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J737V Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-J810M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-J810M Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N950U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-N950U Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Mobile Safari/537.36 Samsung SM-N950U Chrome Mobile 72.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 Samsung SM-N950U Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.73 Mobile Safari/537.36 Samsung SM-N950U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N950U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-N960F Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-N960U Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-N960U Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Mobile Safari/537.36 Samsung SM-N960U Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.143 Mobile Safari/537.36 Samsung SM-N960U Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-N960U Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N960U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N960U1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N960U1 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N975U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.73 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-N975U Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N975U Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 Samsung SM-N975U Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N975U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-N975U Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-N976V Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/240.0.0.38.121;] Samsung SM-N976V Facebook 240.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-S367VL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Samsung SM-S367VL Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-S767VL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Mobile Safari/537.36 Samsung SM-S767VL Chrome Mobile 76.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-T597P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 Samsung SM-T597P Chrome 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; SM-T720) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Safari/537.36 SM-T720 Chrome 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; TECNO KC8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 TECNO KC8 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 Huawei VOG-L29 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; cp3705A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.136 Mobile Safari/537.36 cp3705A Chrome Mobile 74.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto g(6) Build/PPS29.118-15-11; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.92 Mobile Safari/537.36;dailymotion-player-sdk-android 0.1.31 moto g(6) Chrome Mobile WebView 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto g(6) play) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 moto g(6) play Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto g(7) play Build/PCYS29.105-134-1; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/76.0.3809.132 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/235.0.0.38.118;] moto g(7) play Facebook 235.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto g(7) play) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Mobile Safari/537.36 moto g(7) play Chrome Mobile 70.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto g(7) power) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.101 Mobile Safari/537.36 moto g(7) power Chrome Mobile 75.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto g(7) power) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Mobile Safari/537.36 moto g(7) power Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto z4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.90 Mobile Safari/537.36 moto z4 Chrome Mobile 73.0 Android 9.0.0 +Mozilla/5.0 (Linux; Android 9; moto z4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.73 Mobile Safari/537.36 moto z4 Chrome Mobile 77.0 Android 9.0.0 +Mozilla/5.0 (Linux; U; Android 4.1.1; en-us; GT-P3113 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30 Samsung GT-P3113 Android 4.1 Android 4.1.1 +Mozilla/5.0 (Linux; U; Android 4.1.2; ar-ae; GT-I8160 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 Samsung GT-I8160 Android 4.1 Android 4.1.2 +Mozilla/5.0 (Linux; U; Android 4.2.2; en-us; Nexus 7 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30; DailymotionEmbedSDK 1.0 Asus Nexus 7 Android 4.2 Android 4.2.2 +Mozilla/5.0 (Linux; U; Android 4.4; en-us; SM-E500H Build/JOP24G) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 Samsung SM-E500H Android 4.4 Android 4.4.0 +Mozilla/5.0 (Linux; U; Android 6.0.1; en-us; LGMS550 Build/JOP24G) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Chrome/43.0.2357.65 Mobile Safari/534.30 LGMS550 Chrome Mobile WebView 43.0 Android 6.0.1 +Mozilla/5.0 (Linux; U; Android 6.0.1; en-us; SM-J737T1 Build/JOP24G) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Chrome/43.0.2357.65 Mobile Safari/534.30 Samsung SM-J737T1 Chrome Mobile WebView 43.0 Android 6.0.1 +Mozilla/5.0 (Linux; U; Android 7.0; TECNO CA6 Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/64.0.3282.137 Mobile Safari/537.36 OPR/5.3.2254.135058 TECNO CA6 Opera Mobile 5.3 Android 7.0.0 +Mozilla/5.0 (Linux; U; Android 7.1.2; id-id; Redmi 5A Build/N2G47H) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/9.5.6 XiaoMi Redmi 5A MiuiBrowser 9.5 Android 7.1.2 +Mozilla/5.0 (Linux; U; Android 9; in-id; CPH1911 Build/PPR1.180610.011) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.80 Mobile Safari/537.36 OppoBrowser/25.6.0.0.5beta Oppo CPH1911 Chrome Mobile WebView 70.0 Android 9.0.0 +Mozilla/5.0 (Linux; U; Android 9; vivo 1904 Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36 OPR/44.1.2254.143214 vivo 1904 Opera Mobile 44.1 Android 9.0.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:68.0) Gecko/20100101 Firefox/68.0 Mac Firefox 68.0 Mac OS X 10.11.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:69.0) Gecko/20100101 Firefox/69.0 Mac Firefox 69.0 Mac OS X 10.13.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:67.0) Gecko/20100101 Firefox/67.0 Mac Firefox 67.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:68.0) Gecko/20100101 Firefox/68.0 Mac Firefox 68.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:69.0) Gecko/20100101 Firefox/69.0 Mac Firefox 69.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:70.0) Gecko/20100101 Firefox/70.0 Mac Firefox 70.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.10.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.10.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8 Mac Safari 10.1 Mac OS X 10.10.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.11.4 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36 Mac Chrome 72.0 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7 Mac Safari 9.1 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14 Mac Safari 10.0 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15 Mac Safari 11.1 Mac OS X 10.11.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.12.1 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/602.4.8 (KHTML, like Gecko) Version/10.0.3 Safari/602.4.8 Mac Safari 10.0 Mac OS X 10.12.3 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 Mac Chrome 75.0 Mac OS X 10.12.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.12.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.12.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.12.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15 Mac Safari 12.1 Mac OS X 10.12.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Safari/604.1.38 Mac Safari 11.0 Mac OS X 10.13.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.13.1 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.13.2 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.13.4 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.13.4 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.13.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36 Mac Chrome 75.0 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15 Mac Safari 12.0 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15 Mac Safari 12.1 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15 Mac Safari 12.1 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Safari/605.1.15 Mac Safari 13.0 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.1 Safari/605.1.15 Mac Safari 13.0 Mac OS X 10.13.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36 Mac Chrome 75.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.14.0 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.14.1 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.14.2 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 Mac Chrome 69.0 Mac OS X 10.14.3 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15 Mac Safari 12.0 Mac OS X 10.14.3 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36 Mac Chrome 75.0 Mac OS X 10.14.4 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.14.4 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Safari/605.1.15 Mac Safari 12.1 Mac OS X 10.14.4 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.14.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.14.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15 Mac Safari 12.1 Mac OS X 10.14.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36 Mac Chrome 75.0 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Mac Chrome 76.0 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Mac Chrome 77.0 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Safari/605.1.15 Mac Safari 12.1 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.1 Safari/605.1.15 Mac Safari 13.0 Mac OS X 10.14.6 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36 Mac Chrome 65.0 Mac OS X 10.9.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36 Mac Chrome 66.0 Mac OS X 10.9.5 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 Mac Chrome 67.0 Mac OS X 10.9.5 +Mozilla/5.0 (PlayStation 4 6.72) AppleWebKit/605.1.15 (KHTML, like Gecko) PlayStation 4 Apple Mail 605.1 Other 0.0.0 +Mozilla/5.0 (SMART-TV; LINUX; Tizen 3.0) AppleWebKit/538.1 (KHTML, like Gecko) Version/3.0 TV Safari/538.1 Samsung SMART-TV Safari 3.0 Tizen 3.0.0 +Mozilla/5.0 (SMART-TV; Linux; Tizen 3.0) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.0 Chrome/47.0.2526.69 TV safari/537.36 Samsung SMART-TV Samsung Internet 2.0 Tizen 3.0.0 +Mozilla/5.0 (SMART-TV; Linux; Tizen 4.0) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.1 Chrome/56.0.2924.0 TV Safari/537.36 Samsung SMART-TV Samsung Internet 2.1 Tizen 4.0.0 +Mozilla/5.0 (SMART-TV; Linux; Tizen 5.0) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.2 Chrome/63.0.3239.84 TV Safari/537.36 Samsung SMART-TV Samsung Internet 2.2 Tizen 5.0.0 +Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134 Other Edge 17.17134 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763 Other Edge 18.17763 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36 Other Chrome 77.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36 Maxthon/5.2.7.5000 Other Maxthon 5.2 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.1.3683.41 Safari/537.36 Other Chrome 73.1 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Other Chrome 76.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 OPR/63.0.3368.94 Other Opera 63.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36 Other Chrome 77.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/82.0.144 Chrome/76.0.3809.144 Safari/537.36 Other Coc Coc 82.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko Other IE 11.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0 Other Firefox 59.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0 Other Firefox 60.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063 Other Edge 15.15063 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299 Other Edge 16.16299 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134 Other Edge 17.17134 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763 Other Edge 18.17763 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36 Other Chrome 65.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Other Chrome 70.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362 Other Edge 18.18362 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18995 Other Edge 18.18995 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19493 Other Edge 18.19493 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 Other Chrome 70.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Other Chrome 71.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36 Other Chrome 73.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36 Other Chrome 74.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36 Other Chrome 75.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Other Chrome 76.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.110 Safari/537.36 Vivaldi/2.7.1628.30 Other Vivaldi 2.7 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Other Chrome 76.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 OPR/63.0.3368.94 Other Opera 63.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36 Other Chrome 77.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3907.0 Safari/537.36 Edg/79.0.279.0 Other Edge 79.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362 Other Edge 18.18362 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18363 Other Edge 18.18363 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One; WebView/3.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362 Other Edge 18.18362 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0 Other Firefox 61.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0 Other Firefox 63.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0 Other Firefox 67.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0 Other Firefox 68.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 10.0; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 10.0.0 +Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36 Other Chrome 49.0 Windows XP.0.0 +Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36 Other Chrome 49.0 Windows Vista.0.0 +Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36 Other Chrome 49.0 Windows Vista.0.0 +Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Other Chrome 76.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/80.0.180 Chrome/74.0.3729.180 Safari/537.36 Other Coc Coc 80.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/82.0.144 Chrome/76.0.3809.144 Safari/537.36 Other Coc Coc 82.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko Other IE 11.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36 Other Chrome 67.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 Other Chrome 70.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36 Other Chrome 72.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36 Other Chrome 74.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 Other Chrome 75.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Other Chrome 76.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Other Chrome 76.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0 Waterfox/56.2.14 Other Waterfox 56.2 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0 Other Firefox 60.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0 Other Firefox 63.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0 Other Firefox 68.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.1; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 7.0.0 +Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 8.0.0 +Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 8.0.0 +Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; ARM; Trident/7.0; Touch; rv:11.0) like Gecko Other IE 11.0 Windows RT 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Trident/7.0; Touch; rv:11.0) like Gecko Other IE 11.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko Other IE 11.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 Other Chrome 63.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 Other Chrome 64.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36 Other Chrome 76.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36 Other Chrome 76.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 Other Chrome 77.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 8.1.0 +Mozilla/5.0 (Windows NT 6.3; rv:69.0) Gecko/20100101 Firefox/69.0 Other Firefox 69.0 Windows 8.1.0 +Mozilla/5.0 (Windows; U; Windows NT 10.0; en-US; Valve Steam GameOverlay/1568860339; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36 Other Chrome 72.0 Windows 10.0.0 +Mozilla/5.0 (X11; CrOS aarch64 12371.75.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.105 Safari/537.36 Other Chrome 77.0 Chrome OS 12371.75.0 +Mozilla/5.0 (X11; CrOS armv7l 12239.92.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.136 Safari/537.36 Other Chrome 76.0 Chrome OS 12239.92.0 +Mozilla/5.0 (X11; CrOS x86_64 10895.78.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.120 Safari/537.36 Other Chrome 69.0 Chrome OS 10895.78.0 +Mozilla/5.0 (X11; CrOS x86_64 11021.81.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 Other Chrome 70.0 Chrome OS 11021.81.0 +Mozilla/5.0 (X11; CrOS x86_64 11895.118.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.159 Safari/537.36 Other Chrome 74.0 Chrome OS 11895.118.0 +Mozilla/5.0 (X11; CrOS x86_64 12239.92.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.136 Safari/537.36 Other Chrome 76.0 Chrome OS 12239.92.0 +Mozilla/5.0 (X11; CrOS x86_64 12239.92.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.136 Safari/537.36 Other Chrome 76.0 Chrome OS 12239.92.1 +Mozilla/5.0 (X11; CrOS x86_64 12239.92.4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.136 Safari/537.36 Other Chrome 76.0 Chrome OS 12239.92.4 +Mozilla/5.0 (X11; CrOS x86_64 12371.46.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.63 Safari/537.36 Other Chrome 77.0 Chrome OS 12371.46.0 +Mozilla/5.0 (X11; CrOS x86_64 12371.65.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.93 Safari/537.36 Other Chrome 77.0 Chrome OS 12371.65.0 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 Other Chrome 75.0 Linux 0.0.0 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36 Other Chrome 77.0 Linux 0.0.0 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/10.1 Chrome/71.0.3578.99 Safari/537.36 Other Samsung Internet 10.1 Linux 0.0.0 +Mozilla/5.0 (X11; U; U; Linux x86_64; in-id) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36 Other Chrome 66.0 Linux 0.0.0 +Mozilla/5.0 (X11; U; U; Linux x86_64; pt-pt) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36 Other Chrome 66.0 Linux 0.0.0 +Mozilla/5.0 (X11; U; U; Linux x86_64; th-th) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36 Other Chrome 66.0 Linux 0.0.0 +Mozilla/5.0 (X11; U; U; Linux x86_64; vi-vn) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36 Other Chrome 66.0 Linux 0.0.0 +Mozilla/5.0 (X11; U; U; Linux x86_64; zh-cn) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36 Other Chrome 66.0 Linux 0.0.0 +Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:65.0) Gecko/20100101 Firefox/65.0 Other Firefox 65.0 Ubuntu 0.0.0 +Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0 Other Firefox 66.0 Ubuntu 0.0.0 +Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0 Other Firefox 67.0 Ubuntu 0.0.0 +Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) GSA/22.0.141836113 Mobile/14G60 Safari/600.1.4 iPad Google 22.0 iOS 10.3.3 +Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) CriOS/71.0.3578.89 Mobile/14G60 Safari/602.1 iPad Chrome Mobile iOS 71.0 iOS 10.3.3 +Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) FxiOS/14.0b12646 Mobile/14G60 Safari/603.3.8 iPad Firefox iOS 14.0 iOS 10.3.3 +Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 iPad Mobile Safari UI/WKWebView 0.0 iOS 10.3.3 +Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 [FBAN/FBIOS;FBAV/240.0.0.55.117;FBBV/174195427;FBDV/iPad5,3;FBMD/iPad;FBSN/iOS;FBSV/10.3.3;FBSS/2;FBID/tablet;FBLC/zh_TW;FBOP/5;FBRV/175353135;FBCR/] iPad Facebook 240.0 iOS 10.3.3 +Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 Mobile/14G60 Safari/602.1 iPad Mobile Safari 10.0 iOS 10.3.3 +Mozilla/5.0 (iPad; CPU OS 10_3_4 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 Mobile/14G61 Safari/602.1 iPad Mobile Safari 10.0 iOS 10.3.4 +Mozilla/5.0 (iPad; CPU OS 11_1 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15B101 Safari/604.1 iPad Chrome Mobile iOS 76.0 iOS 11.1.0 +Mozilla/5.0 (iPad; CPU OS 11_1_2 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15B202 Safari/604.1 iPad Chrome Mobile iOS 76.0 iOS 11.1.2 +Mozilla/5.0 (iPad; CPU OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0 Mobile/15C153 Safari/604.1 iPad Mobile Safari 11.0 iOS 11.2.1 +Mozilla/5.0 (iPad; CPU OS 11_2_2 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0 Mobile/15C202 Safari/604.1 iPad Mobile Safari 11.0 iOS 11.2.2 +Mozilla/5.0 (iPad; CPU OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0 Mobile/15D100 Safari/604.1 iPad Mobile Safari 11.0 iOS 11.2.6 +Mozilla/5.0 (iPad; CPU OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 11.0 iOS 11.3.0 +Mozilla/5.0 (iPad; CPU OS 11_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 11.0 iOS 11.4.0 +Mozilla/5.0 (iPad; CPU OS 11_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15G77 iPad Mobile Safari UI/WKWebView 0.0 iOS 11.4.1 +Mozilla/5.0 (iPad; CPU OS 11_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 11.0 iOS 11.4.1 +Mozilla/5.0 (iPad; CPU OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/605.1 iPad Google 83.0 iOS 12.0.0 +Mozilla/5.0 (iPad; CPU OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.0 iOS 12.0.0 +Mozilla/5.0 (iPad; CPU OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/75.0.3770.103 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 75.0 iOS 12.1.0 +Mozilla/5.0 (iPad; CPU OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 76.0 iOS 12.1.0 +Mozilla/5.0 (iPad; CPU OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16B92 iPad Mobile Safari UI/WKWebView 0.0 iOS 12.1.0 +Mozilla/5.0 (iPad; CPU OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.0 iOS 12.1.0 +Mozilla/5.0 (iPad; CPU OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.0 iOS 12.1.1 +Mozilla/5.0 (iPad; CPU OS 12_1_4 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) GSA/48.0.193557427 Mobile/16D57 Safari/604.1 iPad Google 48.0 iOS 12.1.4 +Mozilla/5.0 (iPad; CPU OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16D57 iPad Mobile Safari UI/WKWebView 0.0 iOS 12.1.4 +Mozilla/5.0 (iPad; CPU OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.0 iOS 12.1.4 +Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 76.0 iOS 12.2.0 +Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPad Mobile Safari UI/WKWebView 0.0 iOS 12.2.0 +Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.1 iOS 12.2.0 +Mozilla/5.0 (iPad; CPU OS 12_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.93 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 77.0 iOS 12.3.0 +Mozilla/5.0 (iPad; CPU OS 12_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/605.1 iPad Google 83.0 iOS 12.3.0 +Mozilla/5.0 (iPad; CPU OS 12_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.1 iOS 12.3.0 +Mozilla/5.0 (iPad; CPU OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPad Mobile Safari UI/WKWebView 0.0 iOS 12.3.1 +Mozilla/5.0 (iPad; CPU OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.1 iOS 12.3.1 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 76.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.81 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 76.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.103 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 77.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.69 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 77.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.93 Mobile/15E148 Safari/605.1 iPad Chrome Mobile iOS 77.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/74.0.248026584 Mobile/15E148 Safari/605.1 iPad Google 74.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/605.1 iPad Google 83.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPad Mobile Safari UI/WKWebView 0.0 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.1 iOS 12.4.0 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) CriOS/67.0.3396.87 Mobile/16G102 Safari/604.1 iPad Chrome Mobile iOS 67.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/19.0b16042 Mobile/15E148 Safari/605.1.15 iPad Firefox iOS 19.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPad Mobile Safari UI/WKWebView 0.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPad4,7;FBMD/iPad;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/tablet;FBLC/vi_VN;FBOP/5;FBCR/] iPad Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPad5,1;FBMD/iPad;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBCR/;FBID/tablet;FBLC/en_US;FBOP/5] iPad Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPad6,11;FBMD/iPad;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/tablet;FBLC/en_US;FBOP/5;FBCR/] iPad Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPad7,5;FBMD/iPad;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/tablet;FBLC/en_US;FBOP/5;FBCR/] iPad Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Mobile/15E148 Safari/604.1 iPad Mobile Safari 12.1 iOS 12.4.1 +Mozilla/5.0 (iPad; CPU OS 6_1_3 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10B329 Safari/8536.25 iPad Mobile Safari 6.0 iOS 6.1.3 +Mozilla/5.0 (iPad; CPU OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4 iPad Mobile Safari 8.0 iOS 8.0.0 +Mozilla/5.0 (iPad; CPU OS 8_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12D508 Safari/600.1.4 iPad Mobile Safari 8.0 iOS 8.2.0 +Mozilla/5.0 (iPad; CPU OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) GSA/23.1.148956103 Mobile/12H143 Safari/600.1.4 iPad Google 23.1 iOS 8.4.0 +Mozilla/5.0 (iPad; CPU OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1 iPad Mobile Safari 9.0 iOS 9.3.2 +Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G36 Safari/601.1 iPad Mobile Safari 9.0 iOS 9.3.5 +Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1 iPhone Mobile Safari 10.0 iOS 10.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 [FBAN/FBIOS;FBDV/iPhone7,1;FBMD/iPhone;FBSN/iOS;FBSV/10.3.3;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 10.3.3 +Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_4 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) GSA/68.0.234683655 Mobile/14G61 Safari/602.1 iPhone Google 68.0 iOS 10.3.4 +Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_4 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 Mobile/14G61 Safari/602.1 iPhone Mobile Safari 10.0 iOS 10.3.4 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A432 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.0.3 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_2 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B202 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.1.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0 Mobile/15C153 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.2.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/207.0.0.48.100;FBBV/141048683;FBDV/iPhone9,3;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/2;FBCR/SFR;FBID/phone;FBLC/fr_FR;FBOP/5;FBRV/142061404] iPhone Facebook 207.0 iOS 11.2.6 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/604.1 iPhone Chrome Mobile iOS 76.0 iOS 11.3.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E302 [FBAN/FBIOS;FBDV/iPhone7,2;FBMD/iPhone;FBSN/iOS;FBSV/11.3.1;FBSS/2;FBID/phone;FBLC/fr_FR;FBOP/5;FBCR/VINI] iPhone Facebook 0.0 iOS 11.3.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.3.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/604.1 iPhone Google 83.0 iOS 11.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_4_1 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) GSA/74.1.250942683 Mobile/15G77 Safari/604.1 iPhone Google 74.1 iOS 11.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 11_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 11.0 iOS 11.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.0 iOS 12.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.0 iOS 12.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.0 iOS 12.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/74.1.250942683 Mobile/16C101 Safari/604.1 iPhone Google 74.1 iOS 12.1.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/16C101 [FBAN/FBIOS;FBDV/iPhone9,3;FBMD/iPhone;FBSN/iOS;FBSV/12.1.2;FBSS/2;FBCR/Free;FBID/phone;FBLC/fr_FR;FBOP/5] iPhone Facebook 0.0 iOS 12.1.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.0 iOS 12.1.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.0 iOS 12.1.3 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/74.1.250942683 Mobile/16D57 Safari/604.1 iPhone Google 74.1 iOS 12.1.4 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.0 iOS 12.1.4 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/72.0.3626.101 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 72.0 iOS 12.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 76.0 iOS 12.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.69 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 12.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,2;FBMD/iPhone;FBSN/iOS;FBSV/12.2;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,5;FBMD/iPhone;FBSN/iOS;FBSV/12.2;FBSS/3;FBCR/AT&T;FBID/phone;FBLC/en_US;FBOP/5] iPhone Facebook 0.0 iOS 12.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.2.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.69 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 12.3.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/605.1 iPhone Google 83.0 iOS 12.3.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.3.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/79.0.259819395 Mobile/16F203 Safari/604.1 iPhone Google 79.0 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPhone Mobile Safari UI/WKWebView 0.0 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 DuckDuckGo/7 iPhone DuckDuckGo Mobile 7.0 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,6;FBMD/iPhone;FBSN/iOS;FBSV/12.3.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/12.3.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,3;FBMD/iPhone;FBSN/iOS;FBSV/12.3.1;FBSS/2;FBID/phone;FBLC/es_LA;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.3.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_3_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.3.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/69.0.3497.105 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 69.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/73.0.3683.68 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 73.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/75.0.3770.103 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 75.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 76.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.103 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.69 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/81.0.264749124 Mobile/15E148 Safari/605.1 iPhone Google 81.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/82.1.267240167 Mobile/15E148 Safari/605.1 iPhone Google 82.1 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/605.1 iPhone Google 83.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,1;FBMD/iPhone;FBSN/iOS;FBSV/12.4;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,5;FBMD/iPhone;FBSN/iOS;FBSV/12.4;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/12.4;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Sprint] iPhone Facebook 0.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,4;FBMD/iPhone;FBSN/iOS;FBSV/12.4;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.4.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/74.1.250942683 Mobile/16G102 Safari/604.1 iPhone Google 74.1 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPhone Mobile Safari UI/WKWebView 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Instagram 89.0.0.14.100 (iPhone11,6; iOS 12_4_1; en_US; en-US; scale=3.00; gamut=normal; 1242x2688; 149781277) iPhone Instagram 89.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBAV/240.0.0.55.117;FBBV/174195427;FBDV/iPhone7,2;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/es_LA;FBOP/5;FBRV/175040728;FBCR/AT&T] iPhone Facebook 240.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,2;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Sprint] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,3;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,3;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,4;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/es_LA;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,4;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/fr_FR;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,5;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBCR/T-Mobile;FBID/phone;FBLC/es_LA;FBOP/5] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,5;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,5;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/es_LA;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,6;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,2;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/fr_FR;FBOP/5;FBCR/SFR] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,8;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,8;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,8;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/fr_FR;FBOP/5;FBCR/Carrier] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone7,2;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone8,1;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/MetroPCS] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,2;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/cricket] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,4;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,4;FBMD/iPhone;FBSN/iOS;FBSV/12.4.1;FBSS/3;FBID/phone;FBLC/fr_FR;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.4.1 Mobile/15E148 Safari/605.1.15 iPhone Mobile Safari 12.4 iOS 12.4.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPhone Mobile Safari UI/WKWebView 0.0 iOS 12.4.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 12_4_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.2 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 12.1 iOS 12.4.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.69 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,5;FBMD/iPhone;FBSN/iOS;FBSV/13.0;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,6;FBMD/iPhone;FBSN/iOS;FBSV/13.0;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,2;FBMD/iPhone;FBSN/iOS;FBSV/13.0;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,8;FBMD/iPhone;FBSN/iOS;FBSV/13.0;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,8;FBMD/iPhone;FBSN/iOS;FBSV/13.0;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,4;FBMD/iPhone;FBSN/iOS;FBSV/13.0;FBSS/3;FBID/phone;FBLC/fr_FR;FBOP/5;FBCR/Orange France] iPhone Facebook 0.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 13.0 iOS 13.0.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/76.0.3809.123 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 76.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.69 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/77.0.3865.93 Mobile/15E148 Safari/605.1 iPhone Chrome Mobile iOS 77.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/8.1.3 Mobile/15E148 Safari/605.1.15 iPhone Firefox iOS 8.1 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/83.0.268992909 Mobile/15E148 Safari/605.1 iPhone Google 83.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPhone Mobile Safari UI/WKWebView 0.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 DuckDuckGo/7 iPhone DuckDuckGo Mobile 7.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,1;FBMD/iPhone;FBSN/iOS;FBSV/13.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,6;FBMD/iPhone;FBSN/iOS;FBSV/13.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,6;FBMD/iPhone;FBSN/iOS;FBSV/13.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,8;FBMD/iPhone;FBSN/iOS;FBSV/13.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone9,4;FBMD/iPhone;FBSN/iOS;FBSV/13.1;FBSS/3;FBID/phone;FBLC/es_LA;FBOP/5;FBCR/Telcel] iPhone Facebook 0.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 13.0 iOS 13.1.0 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPhone Mobile Safari UI/WKWebView 0.0 iOS 13.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,1;FBMD/iPhone;FBSN/iOS;FBSV/13.1.1;FBSS/2;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Union] iPhone Facebook 0.0 iOS 13.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone10,3;FBMD/iPhone;FBSN/iOS;FBSV/13.1.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 13.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/13.1.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/T-Mobile] iPhone Facebook 0.0 iOS 13.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,6;FBMD/iPhone;FBSN/iOS;FBSV/13.1.1;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/Verizon] iPhone Facebook 0.0 iOS 13.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 13.0 iOS 13.1.1 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 iPhone Mobile Safari UI/WKWebView 0.0 iOS 13.1.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 [FBAN/FBIOS;FBDV/iPhone11,2;FBMD/iPhone;FBSN/iOS;FBSV/13.1.2;FBSS/3;FBID/phone;FBLC/en_US;FBOP/5;FBCR/AT&T] iPhone Facebook 0.0 iOS 13.1.2 +Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.1 Mobile/15E148 Safari/604.1 iPhone Mobile Safari 13.0 iOS 13.1.2 diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh index 8e361a191d6..6e386360d60 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh @@ -68,13 +68,13 @@ $CLICKHOUSE_CLIENT -n --query=" insert into user_agents select ua from input('ua String') FORMAT LineAsString" < $CURDIR/data_ua_parser/useragents.txt $CLICKHOUSE_CLIENT -n --query=" -select device, +select ua, device, concat(tupleElement(browser, 1), ' ', tupleElement(browser, 2), '.', tupleElement(browser, 3)) as browser , concat(tupleElement(os, 1), ' ', tupleElement(os, 2), '.', tupleElement(os, 3), '.', tupleElement(os, 4)) as os from ( - select dictGet('regexp_os', ('os_replacement', 'os_v1_replacement', 'os_v2_replacement', 'os_v3_replacement'), ua) os, + select ua, dictGet('regexp_os', ('os_replacement', 'os_v1_replacement', 'os_v2_replacement', 'os_v3_replacement'), ua) os, dictGet('regexp_browser', ('family_replacement', 'v1_replacement', 'v2_replacement'), ua) as browser, - dictGet('regexp_device', 'device_replacement', ua) device from user_agents); + dictGet('regexp_device', 'device_replacement', ua) device from user_agents) order by ua; " $CLICKHOUSE_CLIENT -n --query=" diff --git a/tests/queries/0_stateless/02513_validate_data_types.sql b/tests/queries/0_stateless/02513_validate_data_types.sql index 1235d00ba79..5eb91ac7879 100644 --- a/tests/queries/0_stateless/02513_validate_data_types.sql +++ b/tests/queries/0_stateless/02513_validate_data_types.sql @@ -5,10 +5,6 @@ select CAST('{"x" : 1}', 'JSON'); -- {serverError ILLEGAL_COLUMN} desc file(nonexist.json, JSONAsObject); -- {serverError ILLEGAL_COLUMN} desc file(nonexist.json, JSONEachRow, 'x JSON'); -- {serverError ILLEGAL_COLUMN} -set allow_experimental_geo_types=0; -select CAST([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]], 'Polygon'); -- {serverError ILLEGAL_COLUMN} -desc file(nonexist.json, JSONEachRow, 'pg Polygon'); -- {serverError ILLEGAL_COLUMN} - set allow_suspicious_low_cardinality_types=0; select CAST(1000000, 'LowCardinality(UInt64)'); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} desc file(nonexist.json, JSONEachRow, 'lc LowCardinality(UInt64)'); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} @@ -16,4 +12,3 @@ desc file(nonexist.json, JSONEachRow, 'lc LowCardinality(UInt64)'); -- {serverEr set allow_suspicious_fixed_string_types=0; select CAST('', 'FixedString(1000)'); -- {serverError ILLEGAL_COLUMN} desc file(nonexist.json, JSONEachRow, 'fs FixedString(1000)'); -- {serverError ILLEGAL_COLUMN} - diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index 0f6fa7e2e66..8b4cafc3260 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -24,26 +24,26 @@ Header: count() UInt64 Header: default.a.a2_4 String default.b.b1_0 UInt64 Join (JOIN FillRightFirst) - Header: default.a.a2_4 String - default.a.a1_1 UInt64 + Header: default.a.a1_1 UInt64 + default.a.a2_4 String default.b.b1_0 UInt64 Expression ((JOIN actions + Change column names to column identifiers)) - Header: default.a.a2_4 String - default.a.a1_1 UInt64 - ReadFromStorage (Memory) - Header: a2 String - a1 UInt64 + Header: default.a.a1_1 UInt64 + default.a.a2_4 String + ReadFromMemoryStorage + Header: a1 UInt64 + a2 String Expression ((JOIN actions + Change column names to column identifiers)) Header: default.b.b1_0 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: b1 UInt64 Expression ((JOIN actions + Change column names to column identifiers)) Header: default.c.c1_2 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: c1 UInt64 Expression ((JOIN actions + Change column names to column identifiers)) Header: default.d.d1_3 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: d1 UInt64 EXPLAIN PLAN header = 1 SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) @@ -52,41 +52,41 @@ Expression ((Project names + (Projection + DROP unused columns after JOIN))) Header: a2 String d2 String Join (JOIN FillRightFirst) - Header: default.a.k_2 UInt64 - default.a.a2_0 String + Header: default.a.a2_0 String + default.a.k_2 UInt64 default.d.d2_1 String Expression (DROP unused columns after JOIN) - Header: default.a.k_2 UInt64 - default.a.a2_0 String + Header: default.a.a2_0 String + default.a.k_2 UInt64 Join (JOIN FillRightFirst) - Header: default.a.k_2 UInt64 - default.a.a2_0 String + Header: default.a.a2_0 String + default.a.k_2 UInt64 Expression (DROP unused columns after JOIN) - Header: default.a.k_2 UInt64 - default.a.a2_0 String + Header: default.a.a2_0 String + default.a.k_2 UInt64 Join (JOIN FillRightFirst) - Header: default.a.k_2 UInt64 - default.a.a2_0 String + Header: default.a.a2_0 String + default.a.k_2 UInt64 Expression (Change column names to column identifiers) - Header: default.a.k_2 UInt64 - default.a.a2_0 String - ReadFromStorage (Memory) - Header: k UInt64 - a2 String + Header: default.a.a2_0 String + default.a.k_2 UInt64 + ReadFromMemoryStorage + Header: a2 String + k UInt64 Expression (Change column names to column identifiers) Header: default.b.k_3 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: k UInt64 Expression (Change column names to column identifiers) Header: default.c.k_4 UInt64 - ReadFromStorage (Memory) + ReadFromMemoryStorage Header: k UInt64 Expression (Change column names to column identifiers) - Header: default.d.k_5 UInt64 - default.d.d2_1 String - ReadFromStorage (Memory) - Header: k UInt64 - d2 String + Header: default.d.d2_1 String + default.d.k_5 UInt64 + ReadFromMemoryStorage + Header: d2 String + k UInt64 EXPLAIN PLAN header = 1 SELECT b.bx FROM a JOIN (SELECT b1, b2 || 'x' AS bx FROM b ) AS b ON b.b1 = a.a1 @@ -123,28 +123,28 @@ Header: bx String b.bx_0 String b.b1_1 UInt64 Join (JOIN FillRightFirst) - Header: default.a.a2_6 String - default.a.a1_2 UInt64 + Header: default.a.a1_2 UInt64 + default.a.a2_6 String b.bx_0 String b.b1_1 UInt64 Expression ((JOIN actions + Change column names to column identifiers)) - Header: default.a.a2_6 String - default.a.a1_2 UInt64 - ReadFromStorage (Memory) - Header: a2 String - a1 UInt64 + Header: default.a.a1_2 UInt64 + default.a.a2_6 String + ReadFromMemoryStorage + Header: a1 UInt64 + a2 String Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) Header: b.b1_1 UInt64 b.bx_0 String - ReadFromStorage (Memory) - Header: b2 String - b1 UInt64 + ReadFromMemoryStorage + Header: b1 UInt64 + b2 String Expression ((JOIN actions + Change column names to column identifiers)) - Header: default.c.c2_5 String - default.c.c1_3 UInt64 - ReadFromStorage (Memory) - Header: c2 String - c1 UInt64 + Header: default.c.c1_3 UInt64 + default.c.c2_5 String + ReadFromMemoryStorage + Header: c1 UInt64 + c2 String Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) Header: d.d1_4 UInt64 ReadFromStorage (SystemNumbers) diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index 9e22089d5e1..458a5e95faa 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}" $CLICKHOUSE_CLIENT -n --query " - DROP TABLE IF EXISTS t_async_insert_cleanup NO DELAY; + DROP TABLE IF EXISTS t_async_insert_cleanup SYNC; CREATE TABLE t_async_insert_cleanup ( KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup', '{replica}') @@ -27,7 +27,7 @@ old_answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper W for i in {1..300}; do answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") if [ $answer == '10' ]; then - $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" + $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup SYNC;" exit 0 fi sleep 1 @@ -36,4 +36,4 @@ done $CLICKHOUSE_CLIENT --query "SELECT count(*) FROM t_async_insert_cleanup" echo $old_answer $CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'" -$CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" +$CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup SYNC;" diff --git a/tests/queries/0_stateless/02515_projections_with_totals.reference b/tests/queries/0_stateless/02515_projections_with_totals.reference index c6359cae032..f086fcb10e9 100644 --- a/tests/queries/0_stateless/02515_projections_with_totals.reference +++ b/tests/queries/0_stateless/02515_projections_with_totals.reference @@ -1,3 +1,6 @@ 0 0 +0 + +0 diff --git a/tests/queries/0_stateless/02515_projections_with_totals.sql b/tests/queries/0_stateless/02515_projections_with_totals.sql index 4d43d5381da..1e4b5c6f255 100644 --- a/tests/queries/0_stateless/02515_projections_with_totals.sql +++ b/tests/queries/0_stateless/02515_projections_with_totals.sql @@ -3,4 +3,8 @@ CREATE TABLE t (x UInt8, PROJECTION p (SELECT x GROUP BY x)) ENGINE = MergeTree INSERT INTO t VALUES (0); SET group_by_overflow_mode = 'any', max_rows_to_group_by = 1000, totals_mode = 'after_having_auto'; SELECT x FROM t GROUP BY x WITH TOTALS; + +SET optimize_aggregation_in_order=1; +SELECT x FROM t GROUP BY x WITH TOTALS; + DROP TABLE t; diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference index fd0b223f8e5..19da8828c30 100644 --- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference @@ -2,6 +2,10 @@ 1 0 +1 +1 + +1 \N 100000000000000000000 diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql index b6e60aa2e1f..6b58d737a3e 100644 --- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql +++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + SELECT * FROM ( @@ -12,7 +14,26 @@ INNER JOIN SELECT 1 GROUP BY 1 WITH TOTALS -) AS t2 USING (a); +) AS t2 USING (a) +SETTINGS allow_experimental_analyzer=0; + +SELECT * +FROM +( + SELECT 1 AS a +) AS t1 +INNER JOIN +( + SELECT 1 AS a + GROUP BY 1 + WITH TOTALS + UNION ALL + SELECT 1 + GROUP BY 1 + WITH TOTALS +) AS t2 USING (a) +SETTINGS allow_experimental_analyzer=1; + SELECT a FROM diff --git a/tests/queries/0_stateless/02516_projections_and_context.reference b/tests/queries/0_stateless/02516_projections_and_context.reference index e69de29bb2d..6ed281c757a 100644 --- a/tests/queries/0_stateless/02516_projections_and_context.reference +++ b/tests/queries/0_stateless/02516_projections_and_context.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02516_projections_and_context.sql b/tests/queries/0_stateless/02516_projections_and_context.sql index a7c143c7900..2b659eafabc 100644 --- a/tests/queries/0_stateless/02516_projections_and_context.sql +++ b/tests/queries/0_stateless/02516_projections_and_context.sql @@ -1,6 +1,10 @@ DROP TABLE IF EXISTS test1__fuzz_37; CREATE TABLE test1__fuzz_37 (`i` Date) ENGINE = MergeTree ORDER BY i; insert into test1__fuzz_37 values ('2020-10-10'); +set allow_experimental_analyzer = 0; SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS } SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS } +set allow_experimental_analyzer = 1; +SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); +SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); DROP TABLE test1__fuzz_37; diff --git a/tests/queries/0_stateless/02516_projections_with_rollup.sql b/tests/queries/0_stateless/02516_projections_with_rollup.sql index e670fbb7827..038caf59264 100644 --- a/tests/queries/0_stateless/02516_projections_with_rollup.sql +++ b/tests/queries/0_stateless/02516_projections_with_rollup.sql @@ -1,3 +1,6 @@ +-- Tags: disabled +-- FIXME https://github.com/ClickHouse/ClickHouse/issues/49552 + DROP TABLE IF EXISTS video_log; DROP TABLE IF EXISTS video_log_result__fuzz_0; DROP TABLE IF EXISTS rng; diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index aaa1b8b9b2d..b7d4a6ee93a 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -3,7 +3,7 @@ set max_threads = 16; set allow_aggregate_partitions_independently = 1; set force_aggregate_partitions_independently = 1; -set allow_experimental_projection_optimization = 0; +set optimize_use_projections = 0; create table t1(a UInt32) engine=MergeTree order by tuple() partition by a % 4 settings index_granularity = 8192, index_granularity_bytes = 10485760; diff --git a/tests/queries/0_stateless/02521_cannot_find_column_in_projection.reference b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.reference new file mode 100644 index 00000000000..2cd767c8054 --- /dev/null +++ b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.reference @@ -0,0 +1 @@ +2023-01-05 10 diff --git a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.sql similarity index 50% rename from tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql rename to tests/queries/0_stateless/02521_cannot_find_column_in_projection.sql index 31602c5bae2..6ee8ec07178 100644 --- a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.sql +++ b/tests/queries/0_stateless/02521_cannot_find_column_in_projection.sql @@ -1,3 +1,5 @@ +drop table if exists test; create table test(day Date, id UInt32) engine=MergeTree partition by day order by tuple(); insert into test select toDate('2023-01-05') AS day, number from numbers(10); -with toUInt64(id) as id_with select day, count(id_with) from test where day >= '2023-01-01' group by day limit 1000; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK } +with toUInt64(id) as id_with select day, count(id_with) from test where day >= '2023-01-01' group by day limit 1000; +drop table test; diff --git a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql index e32238001bf..d78e8d34232 100644 --- a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql +++ b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql @@ -9,6 +9,6 @@ ENGINE = Memory; INSERT INTO data_a_02187 SELECT * FROM system.one -SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', insert_distributed_sync = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_monotonous_functions_in_order_by = '1', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', allow_experimental_geo_types = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', allow_experimental_map_type = '1', allow_experimental_projection_optimization = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; +SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', insert_distributed_sync = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_monotonous_functions_in_order_by = '1', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', allow_experimental_map_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; DROP TABLE data_a_02187; diff --git a/tests/queries/0_stateless/02525_different_engines_in_temporary_tables.reference b/tests/queries/0_stateless/02525_different_engines_in_temporary_tables.reference new file mode 100644 index 00000000000..3d1916b29f6 --- /dev/null +++ b/tests/queries/0_stateless/02525_different_engines_in_temporary_tables.reference @@ -0,0 +1,14 @@ +1 a +2 b +3 c +0 +0 +1 a +2 b +3 c +1 a +2 b +3 c +1 a +2 b +3 c diff --git a/tests/queries/0_stateless/02525_different_engines_in_temporary_tables.sql b/tests/queries/0_stateless/02525_different_engines_in_temporary_tables.sql new file mode 100644 index 00000000000..7ebc05dfece --- /dev/null +++ b/tests/queries/0_stateless/02525_different_engines_in_temporary_tables.sql @@ -0,0 +1,66 @@ +DROP TEMPORARY TABLE IF EXISTS table_merge_tree_02525; +CREATE TEMPORARY TABLE table_merge_tree_02525 +( + id UInt64, + info String +) +ENGINE = MergeTree +ORDER BY id +PRIMARY KEY id; +INSERT INTO table_merge_tree_02525 VALUES (1, 'a'), (2, 'b'), (3, 'c'); +SELECT * FROM table_merge_tree_02525; +-- Check that temporary table with MergeTree is not sent to remote servers +-- The query with remote() should not fail +SELECT dummy FROM remote('127.0.0.{1,2}', system, one); +DROP TEMPORARY TABLE table_merge_tree_02525; + +DROP TEMPORARY TABLE IF EXISTS table_log_02525; +CREATE TEMPORARY TABLE table_log_02525 +( + id UInt64, + info String +) +ENGINE = Log; +INSERT INTO table_log_02525 VALUES (1, 'a'), (2, 'b'), (3, 'c'); +SELECT * FROM table_log_02525; +DROP TEMPORARY TABLE table_log_02525; + +DROP TEMPORARY TABLE IF EXISTS table_stripe_log_02525; +CREATE TEMPORARY TABLE table_stripe_log_02525 +( + id UInt64, + info String +) +ENGINE = StripeLog; +INSERT INTO table_stripe_log_02525 VALUES (1, 'a'), (2, 'b'), (3, 'c'); +SELECT * FROM table_stripe_log_02525; +DROP TEMPORARY TABLE table_stripe_log_02525; + +DROP TEMPORARY TABLE IF EXISTS table_tiny_log_02525; +CREATE TEMPORARY TABLE table_tiny_log_02525 +( + id UInt64, + info String +) +ENGINE = TinyLog; +INSERT INTO table_tiny_log_02525 VALUES (1, 'a'), (2, 'b'), (3, 'c'); +SELECT * FROM table_tiny_log_02525; +DROP TEMPORARY TABLE table_tiny_log_02525; + +DROP TEMPORARY TABLE IF EXISTS table_replicated_merge_tree_02525; +CREATE TEMPORARY TABLE table_replicated_merge_tree_02525 +( + id UInt64, + info String +) +ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02525/table_replicated_merge_tree_02525', 'r1') +ORDER BY id +PRIMARY KEY id; -- { serverError INCORRECT_QUERY } + +DROP TEMPORARY TABLE IF EXISTS table_keeper_map_02525; +CREATE TEMPORARY TABLE table_keeper_map_02525 +( + key String, + value UInt32 +) Engine=KeeperMap('/' || currentDatabase() || '/test02525') +PRIMARY KEY(key); -- { serverError INCORRECT_QUERY } diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 52e92f37720..3606b9a41db 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -191,6 +191,6 @@ E51B38608EF25F57 1 1 E28DBDE7FE22E41C -1CE422FEE7BD8DE20000000000000000 +1 E28DBDE7FE22E41C -1CE422FEE7BD8DE20000000000000000 +1 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 3c41efd7d58..9c914f586f0 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -269,6 +269,6 @@ select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 } select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 } select hex(sipHash64()); -select hex(sipHash128()); +SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; select hex(sipHash64Keyed()); -select hex(sipHash128Keyed()); +SELECT hex(sipHash128Keyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference new file mode 100644 index 00000000000..63610604ddd --- /dev/null +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference @@ -0,0 +1,266 @@ +-- { echoOn } +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N \N 45 +set optimize_group_by_function_keys = 0; +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N \N 45 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; +0 0 0 +0 0 0 +0 0 45 +1 0 1 +1 1 1 +2 0 2 +2 0 2 +3 0 3 +3 1 3 +4 0 4 +4 0 4 +5 0 5 +5 1 5 +6 0 6 +6 0 6 +7 0 7 +7 1 7 +8 0 8 +8 0 8 +9 0 9 +9 1 9 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N 0 20 +\N 1 25 +\N \N 45 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; +0 0 0 +0 0 0 +0 0 20 +0 0 45 +0 1 25 +1 0 1 +1 1 1 +2 0 2 +2 0 2 +3 0 3 +3 1 3 +4 0 4 +4 0 4 +5 0 5 +5 1 5 +6 0 6 +6 0 6 +7 0 7 +7 1 7 +8 0 8 +8 0 8 +9 0 9 +9 1 9 +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 1; +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +\N 0 20 +\N 1 25 +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 0; +0 0 0 +0 0 20 +0 1 25 +1 0 1 +2 0 2 +3 0 3 +4 0 4 +5 0 5 +6 0 6 +7 0 7 +8 0 8 +9 0 9 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N \N 45 + +0 0 45 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N 0 20 +\N 1 25 +\N \N 45 + +0 0 45 +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY 1, tuple(val) +SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +\N 0 20 +\N 1 25 diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql new file mode 100644 index 00000000000..a4d4f2f8bc9 --- /dev/null +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql @@ -0,0 +1,85 @@ +SET allow_experimental_analyzer=1; + +-- { echoOn } +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +set optimize_group_by_function_keys = 0; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; + +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 1; + +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 0; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY 1, tuple(val) +SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference new file mode 100644 index 00000000000..8d0f56ba185 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference @@ -0,0 +1,173 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 prefer_localhost_replica=0 +Hello +filter_type='default' max_replicas=2 prefer_localhost_replica=0 +Hello +filter_type='default' max_replicas=3 prefer_localhost_replica=0 +Hello +filter_type='range' max_replicas=1 prefer_localhost_replica=0 +Hello +filter_type='range' max_replicas=2 prefer_localhost_replica=0 +Hello +filter_type='range' max_replicas=3 prefer_localhost_replica=0 +Hello +filter_type='default' max_replicas=1 prefer_localhost_replica=1 +Hello +filter_type='default' max_replicas=2 prefer_localhost_replica=1 +Hello +filter_type='default' max_replicas=3 prefer_localhost_replica=1 +Hello +filter_type='range' max_replicas=1 prefer_localhost_replica=1 +Hello +filter_type='range' max_replicas=2 prefer_localhost_replica=1 +Hello +filter_type='range' max_replicas=3 prefer_localhost_replica=1 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=1 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=1 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 prefer_localhost_replica=0 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=1 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 prefer_localhost_replica=1 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh new file mode 100755 index 00000000000..9850406eb3a --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for prefer_localhost_replica in 0 1; do + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ + , parallel_replicas_custom_key='$2'\ + , parallel_replicas_custom_key_filter_type='$filter_type'\ + , prefer_localhost_replica=$prefer_localhost_replica" + $CLICKHOUSE_CLIENT --query="$query" + done + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key "y" +run_count_with_custom_key "cityHash64(y)" +run_count_with_custom_key "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02536_distributed_detach_table.reference b/tests/queries/0_stateless/02536_distributed_detach_table.reference new file mode 100644 index 00000000000..f09bace4421 --- /dev/null +++ b/tests/queries/0_stateless/02536_distributed_detach_table.reference @@ -0,0 +1,2 @@ +0 0 +10 20 diff --git a/tests/queries/0_stateless/02536_distributed_detach_table.sql b/tests/queries/0_stateless/02536_distributed_detach_table.sql new file mode 100644 index 00000000000..92bee1ee544 --- /dev/null +++ b/tests/queries/0_stateless/02536_distributed_detach_table.sql @@ -0,0 +1,16 @@ +-- test detach distributed table with pending files +CREATE TABLE test_02536 (n Int8) ENGINE=MergeTree() ORDER BY tuple(); +CREATE TABLE test_dist_02536 (n Int8) ENGINE=Distributed(test_cluster_two_shards, currentDatabase(), test_02536, rand()); +SYSTEM STOP DISTRIBUTED SENDS test_dist_02536; + +INSERT INTO test_dist_02536 SELECT number FROM numbers(5) SETTINGS prefer_localhost_replica=0; +SELECT count(n), sum(n) FROM test_dist_02536; -- 0 0 + +DETACH TABLE test_dist_02536; +ATTACH TABLE test_dist_02536; + +SYSTEM FLUSH DISTRIBUTED test_dist_02536; + +SELECT count(n), sum(n) FROM test_dist_02536; -- 10 20 +DROP TABLE test_02536; +DROP TABLE test_dist_02536; diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference new file mode 100644 index 00000000000..5e50b9e6cbf --- /dev/null +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference @@ -0,0 +1,137 @@ +** replaceAll() ** +- non-const needle, const replacement +1 Hello World l x Hexxo Worxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +1 Hello World l x Hexxo Worxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +- const needle, non-const replacement +1 Hello World l xx Hexxxxo Worxxd +2 Hello World l x Hexxo Worxd +3 Hello World l x Hexxo Worxd +4 Hello World l x Hexxo Worxd +5 Hello World l x Hexxo Worxd +1 Hello World l xx Hexxxxo Worxxd +2 Hello World l x Hexxo Worxd +3 Hello World l x Hexxo Worxd +4 Hello World l x Hexxo Worxd +5 Hello World l x Hexxo Worxd +- non-const needle, non-const replacement +1 Hello World l xx Hexxxxo Worxxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +1 Hello World l xx Hexxxxo Worxxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +** replaceOne() ** +- non-const needle, const replacement +1 Hello World l x Hexlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +1 Hello World l x Hexlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +- const needle, non-const replacement +1 Hello World l xx Hexxlo World +2 Hello World l x Hexlo World +3 Hello World l x Hexlo World +4 Hello World l x Hexlo World +5 Hello World l x Hexlo World +1 Hello World l xx Hexxlo World +2 Hello World l x Hexlo World +3 Hello World l x Hexlo World +4 Hello World l x Hexlo World +5 Hello World l x Hexlo World +- non-const needle, non-const replacement +1 Hello World l xx Hexxlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +1 Hello World l xx Hexxlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hello World +5 Hello World . x Hello World +** replaceRegexpAll() ** +- non-const needle, const replacement +1 Hello World l x Hexxo Worxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllx Wxrld +5 Hello World . x xxxxxxxxxxx +1 Hello World l x Hexxo Worxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllx Wxrld +5 Hello World . x xxxxxxxxxxx +- const needle, non-const replacement +1 Hello World l xx Hexxxxo Worxxd +2 Hello World l x Hexxo Worxd +3 Hello World l x Hexxo Worxd +4 Hello World l x Hexxo Worxd +5 Hello World l x Hexxo Worxd +1 Hello World l xx Hexxxxo Worxxd +2 Hello World l x Hexxo Worxd +3 Hello World l x Hexxo Worxd +4 Hello World l x Hexxo Worxd +5 Hello World l x Hexxo Worxd +- non-const needle, non-const replacement +1 Hello World l xx Hexxxxo Worxxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllx Wxrld +5 Hello World . x xxxxxxxxxxx +1 Hello World l xx Hexxxxo Worxxd +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllx Wxrld +5 Hello World . x xxxxxxxxxxx +** replaceRegexpOne() ** +- non-const needle, const replacement +1 Hello World l x Hexlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllo World +5 Hello World . x xello World +1 Hello World l x Hexlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllo World +5 Hello World . x xello World +- const needle, non-const replacement +1 Hello World l xx Hexxlo World +2 Hello World l x Hexlo World +3 Hello World l x Hexlo World +4 Hello World l x Hexlo World +5 Hello World l x Hexlo World +1 Hello World l xx Hexxlo World +2 Hello World l x Hexlo World +3 Hello World l x Hexlo World +4 Hello World l x Hexlo World +5 Hello World l x Hexlo World +- non-const needle, non-const replacement +1 Hello World l xx Hexxlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllo World +5 Hello World . x xello World +1 Hello World l xx Hexxlo World +2 Hello World ll x Hexo World +3 Hello World not_found x Hello World +4 Hello World [eo] x Hxllo World +5 Hello World . x xello World +Check that an exception is thrown if the needle is empty diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql new file mode 100644 index 00000000000..926bde3a74b --- /dev/null +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql @@ -0,0 +1,100 @@ +-- Tests that functions replaceOne(), replaceAll(), replaceRegexpOne(), replaceRegexpAll() work with with non-const pattern and replacement arguments + +DROP TABLE IF EXISTS test_tab; + +CREATE TABLE test_tab + (id UInt32, haystack String, needle String, replacement String) + engine = MergeTree() + ORDER BY id; + +INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'xx') (2, 'Hello World', 'll', 'x') (3, 'Hello World', 'not_found', 'x') (4, 'Hello World', '[eo]', 'x') (5, 'Hello World', '.', 'x') + + +SELECT '** replaceAll() **'; + +SELECT '- non-const needle, const replacement'; +SELECT id, haystack, needle, 'x', replaceAll(haystack, needle, 'x') FROM test_tab ORDER BY id; +SELECT id, haystack, needle, 'x', replaceAll('Hello World', needle, 'x') FROM test_tab ORDER BY id; + +SELECT '- const needle, non-const replacement'; +SELECT id, haystack, 'l', replacement, replaceAll(haystack, 'l', replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, 'l', replacement, replaceAll('Hello World', 'l', replacement) FROM test_tab ORDER BY id; + +SELECT '- non-const needle, non-const replacement'; +SELECT id, haystack, needle, replacement, replaceAll(haystack, needle, replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, needle, replacement, replaceAll('Hello World', needle, replacement) FROM test_tab ORDER BY id; + + +SELECT '** replaceOne() **'; + +SELECT '- non-const needle, const replacement'; +SELECT id, haystack, needle, 'x', replaceOne(haystack, needle, 'x') FROM test_tab ORDER BY id; +SELECT id, haystack, needle, 'x', replaceOne('Hello World', needle, 'x') FROM test_tab ORDER BY id; + +SELECT '- const needle, non-const replacement'; +SELECT id, haystack, 'l', replacement, replaceOne(haystack, 'l', replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, 'l', replacement, replaceOne('Hello World', 'l', replacement) FROM test_tab ORDER BY id; + +SELECT '- non-const needle, non-const replacement'; +SELECT id, haystack, needle, replacement, replaceOne(haystack, needle, replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, needle, replacement, replaceOne('Hello World', needle, replacement) FROM test_tab ORDER BY id; + +SELECT '** replaceRegexpAll() **'; + +SELECT '- non-const needle, const replacement'; +SELECT id, haystack, needle, 'x', replaceRegexpAll(haystack, needle, 'x') FROM test_tab ORDER BY id; +SELECT id, haystack, needle, 'x', replaceRegexpAll('Hello World', needle, 'x') FROM test_tab ORDER BY id; + +SELECT '- const needle, non-const replacement'; +SELECT id, haystack, 'l', replacement, replaceRegexpAll(haystack, 'l', replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, 'l', replacement, replaceRegexpAll('Hello World', 'l', replacement) FROM test_tab ORDER BY id; + +SELECT '- non-const needle, non-const replacement'; +SELECT id, haystack, needle, replacement, replaceRegexpAll(haystack, needle, replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, needle, replacement, replaceRegexpAll('Hello World', needle, replacement) FROM test_tab ORDER BY id; + +SELECT '** replaceRegexpOne() **'; + +SELECT '- non-const needle, const replacement'; +SELECT id, haystack, needle, 'x', replaceRegexpOne(haystack, needle, 'x') FROM test_tab ORDER BY id; +SELECT id, haystack, needle, 'x', replaceRegexpOne('Hello World', needle, 'x') FROM test_tab ORDER BY id; + +SELECT '- const needle, non-const replacement'; +SELECT id, haystack, 'l', replacement, replaceRegexpOne(haystack, 'l', replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, 'l', replacement, replaceRegexpOne('Hello World', 'l', replacement) FROM test_tab ORDER BY id; + +SELECT '- non-const needle, non-const replacement'; +SELECT id, haystack, needle, replacement, replaceRegexpOne(haystack, needle, replacement) FROM test_tab ORDER BY id; +SELECT id, haystack, needle, replacement, replaceRegexpOne('Hello World', needle, replacement) FROM test_tab ORDER BY id; + +DROP TABLE IF EXISTS test_tab; + + +SELECT 'Check that an exception is thrown if the needle is empty'; + +CREATE TABLE test_tab + (id UInt32, haystack String, needle String, replacement String) + engine = MergeTree() + ORDER BY id; + +INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y') + +-- needle: non-const, replacement: const +SELECT replaceAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } + +-- needle: const, replacement: non-const +SELECT replaceAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } + +-- needle: non-const, replacement: non-const +SELECT replaceAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } + +DROP TABLE IF EXISTS test_tab; diff --git a/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference new file mode 100644 index 00000000000..7793e91fcb6 --- /dev/null +++ b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.reference @@ -0,0 +1,16 @@ +monitor_batch_insert=0 +1 2 +1 0 +-- { echoOn } +SELECT sum(key), count(key) FROM dist; +2 2 +SELECT sum(key), count(key) FROM underlying; +2 2 +monitor_batch_insert=1 +1 2 +1 0 +-- { echoOn } +SELECT sum(key), count(key) FROM dist; +2 2 +SELECT sum(key), count(key) FROM underlying; +2 2 diff --git a/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2 b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2 new file mode 100644 index 00000000000..4f8cf1ccffe --- /dev/null +++ b/tests/queries/0_stateless/02537_distributed_loosing_files_after_exception.sql.j2 @@ -0,0 +1,32 @@ +{% for setting in [0, 1] %} +-- Testing that distributed table doesn't loose file after inserts which contain errors + +SELECT 'monitor_batch_insert={{ setting }}'; + +DROP TABLE IF EXISTS dist; +DROP TABLE IF EXISTS underlying; + +CREATE TABLE dist (key Int) ENGINE=Distributed(test_shard_localhost, currentDatabase(), underlying) SETTINGS monitor_batch_inserts={{ setting }}; +SYSTEM STOP DISTRIBUTED SENDS dist; + +INSERT INTO dist SETTINGS prefer_localhost_replica=0, max_threads=1 VALUES (1); +INSERT INTO dist SETTINGS prefer_localhost_replica=0, max_threads=2 VALUES (1); + +SYSTEM FLUSH DISTRIBUTED dist; -- { serverError UNKNOWN_TABLE } +-- check the second since after using queue it may got lost from it +SYSTEM FLUSH DISTRIBUTED dist; -- { serverError UNKNOWN_TABLE } + +SELECT is_blocked, data_files FROM system.distribution_queue WHERE database = currentDatabase() AND table = 'dist'; + +CREATE TABLE underlying (key Int) ENGINE=Memory(); +SYSTEM FLUSH DISTRIBUTED dist; + +-- all data should be flushed +SELECT is_blocked, data_files FROM system.distribution_queue WHERE database = currentDatabase() AND table = 'dist'; + +-- { echoOn } +SELECT sum(key), count(key) FROM dist; +SELECT sum(key), count(key) FROM underlying; +-- { echoOff } + +{% endfor %} diff --git a/tests/queries/0_stateless/02538_alter_rename_sequence.reference b/tests/queries/0_stateless/02538_alter_rename_sequence.reference new file mode 100644 index 00000000000..73aa1b7e8d8 --- /dev/null +++ b/tests/queries/0_stateless/02538_alter_rename_sequence.reference @@ -0,0 +1,8 @@ +1 2 3 +4 5 6 +{"column1_renamed":"1","column2_renamed":"2","column3":"3"} +{"column1_renamed":"4","column2_renamed":"5","column3":"6"} +1 2 3 +4 5 6 +{"column1_renamed":"1","column2_renamed":"2","column3":"3"} +{"column1_renamed":"4","column2_renamed":"5","column3":"6"} diff --git a/tests/queries/0_stateless/02538_alter_rename_sequence.sql b/tests/queries/0_stateless/02538_alter_rename_sequence.sql new file mode 100644 index 00000000000..d7df27dc702 --- /dev/null +++ b/tests/queries/0_stateless/02538_alter_rename_sequence.sql @@ -0,0 +1,59 @@ +DROP TABLE IF EXISTS wrong_metadata; + +CREATE TABLE wrong_metadata( + column1 UInt64, + column2 UInt64, + column3 UInt64 +) +ENGINE ReplicatedMergeTree('/test/{database}/tables/wrong_metadata', '1') +ORDER BY tuple(); + +INSERT INTO wrong_metadata VALUES (1, 2, 3); + +SYSTEM STOP REPLICATION QUEUES wrong_metadata; + +ALTER TABLE wrong_metadata RENAME COLUMN column1 TO column1_renamed SETTINGS replication_alter_partitions_sync = 0; + +INSERT INTO wrong_metadata VALUES (4, 5, 6); + +SELECT * FROM wrong_metadata ORDER BY column1; + +SYSTEM START REPLICATION QUEUES wrong_metadata; + +SYSTEM SYNC REPLICA wrong_metadata; + +ALTER TABLE wrong_metadata RENAME COLUMN column2 to column2_renamed SETTINGS replication_alter_partitions_sync = 2; + +SELECT * FROM wrong_metadata ORDER BY column1_renamed FORMAT JSONEachRow; + +DROP TABLE IF EXISTS wrong_metadata; + + +CREATE TABLE wrong_metadata_wide( + column1 UInt64, + column2 UInt64, + column3 UInt64 +) +ENGINE ReplicatedMergeTree('/test/{database}/tables/wrong_metadata_wide', '1') +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO wrong_metadata_wide VALUES (1, 2, 3); + +SYSTEM STOP REPLICATION QUEUES wrong_metadata_wide; + +ALTER TABLE wrong_metadata_wide RENAME COLUMN column1 TO column1_renamed SETTINGS replication_alter_partitions_sync = 0; + +INSERT INTO wrong_metadata_wide VALUES (4, 5, 6); + +SELECT * FROM wrong_metadata_wide ORDER by column1; + +SYSTEM START REPLICATION QUEUES wrong_metadata_wide; + +SYSTEM SYNC REPLICA wrong_metadata_wide; + +ALTER TABLE wrong_metadata_wide RENAME COLUMN column2 to column2_renamed SETTINGS replication_alter_partitions_sync = 2; + +SELECT * FROM wrong_metadata_wide ORDER BY column1_renamed FORMAT JSONEachRow; + +DROP TABLE IF EXISTS wrong_metadata_wide; diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key.sql b/tests/queries/0_stateless/02540_duplicate_primary_key.sql index 322b6d74845..a084d76964b 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key.sql +++ b/tests/queries/0_stateless/02540_duplicate_primary_key.sql @@ -90,16 +90,16 @@ ORDER BY (coverage, situation_name, NAME_toe, NAME_cockroach); insert into test select * from generateRandom() limit 10; -with dissonance as ( - Select cast(toStartOfInterval(coverage, INTERVAL 1 day) as Date) as flour, count() as regulation +with dissonance as ( + Select cast(toStartOfInterval(coverage, INTERVAL 1 day) as Date) as flour, count() as regulation from test - group by flour having flour >= toDate(now())-100 + group by flour having flour >= toDate(now())-100 ), -cheetah as ( - Select flour, regulation from dissonance - union distinct - Select toDate(now())-1, ifnull((select regulation from dissonance where flour = toDate(now())-1),0) as regulation -) +cheetah as ( + Select flour, regulation from dissonance + union distinct + Select toDate(now())-1, ifnull((select regulation from dissonance where flour = toDate(now())-1),0) as regulation +) Select flour, regulation from cheetah order by flour with fill step 1 limit 100 format Null; drop table test; diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key2.reference b/tests/queries/0_stateless/02540_duplicate_primary_key2.reference new file mode 100644 index 00000000000..08839f6bb29 --- /dev/null +++ b/tests/queries/0_stateless/02540_duplicate_primary_key2.reference @@ -0,0 +1 @@ +200 diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key2.sql b/tests/queries/0_stateless/02540_duplicate_primary_key2.sql new file mode 100644 index 00000000000..d0f02a894f2 --- /dev/null +++ b/tests/queries/0_stateless/02540_duplicate_primary_key2.sql @@ -0,0 +1,99 @@ +drop table if exists test; + +set allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE test +( + `timestamp` DateTime, + `latitude` Nullable(Float32) CODEC(Gorilla, ZSTD(1)), + `longitude` Nullable(Float32) CODEC(Gorilla, ZSTD(1)), + `xxxx1` LowCardinality(UInt8), + `xxxx2` LowCardinality(Nullable(Int16)), + `xxxx3` LowCardinality(Nullable(Int16)), + `xxxx4` Nullable(Int32), + `xxxx5` LowCardinality(Nullable(Int32)), + `xxxx6` Nullable(Int32), + `xxxx7` Nullable(Int32), + `xxxx8` LowCardinality(Int32), + `xxxx9` LowCardinality(Nullable(Int16)), + `xxxx10` LowCardinality(Nullable(Int16)), + `xxxx11` LowCardinality(Nullable(Int16)), + `xxxx12` LowCardinality(String), + `xxxx13` Nullable(Float32), + `xxxx14` LowCardinality(String), + `xxxx15` LowCardinality(Nullable(String)), + `xxxx16` LowCardinality(String), + `xxxx17` LowCardinality(String), + `xxxx18` FixedString(19), + `xxxx19` FixedString(17), + `xxxx20` LowCardinality(UInt8), + `xxxx21` LowCardinality(Nullable(Int16)), + `xxxx22` LowCardinality(Nullable(Int16)), + `xxxx23` LowCardinality(Nullable(Int16)), + `xxxx24` LowCardinality(Nullable(Int16)), + `xxxx25` LowCardinality(Nullable(Int16)), + `xxxx26` LowCardinality(Nullable(Int16)), + `xxxx27` Nullable(Float32), + `xxxx28` LowCardinality(Nullable(String)), + `xxxx29` LowCardinality(String), + `xxxx30` LowCardinality(String), + `xxxx31` LowCardinality(Nullable(String)), + `xxxx32` UInt64, + PROJECTION cumsum_projection_simple + ( + SELECT + xxxx1, + toStartOfInterval(timestamp, toIntervalMonth(1)), + toStartOfWeek(timestamp, 8), + toStartOfInterval(timestamp, toIntervalDay(1)), + xxxx17, + xxxx16, + xxxx14, + xxxx9, + xxxx10, + xxxx21, + xxxx22, + xxxx11, + sum(multiIf(xxxx21 IS NULL, 0, 1)), + sum(multiIf(xxxx22 IS NULL, 0, 1)), + sum(multiIf(xxxx23 IS NULL, 0, 1)), + max(toStartOfInterval(timestamp, toIntervalDay(1))), + max(CAST(CAST(toStartOfInterval(timestamp, toIntervalDay(1)), 'Nullable(DATE)'), 'Nullable(TIMESTAMP)')), + min(toStartOfInterval(timestamp, toIntervalDay(1))), + min(CAST(CAST(toStartOfInterval(timestamp, toIntervalDay(1)), 'Nullable(DATE)'), 'Nullable(TIMESTAMP)')), + count(), + sum(1), + COUNTDistinct(xxxx16), + COUNTDistinct(xxxx31), + COUNTDistinct(xxxx14), + COUNTDistinct(CAST(toStartOfInterval(timestamp, toIntervalDay(1)), 'Nullable(DATE)')) + GROUP BY + xxxx1, + toStartOfInterval(timestamp, toIntervalMonth(1)), + toStartOfWeek(timestamp, 8), + toStartOfInterval(timestamp, toIntervalDay(1)), + xxxx1, + toStartOfInterval(timestamp, toIntervalMonth(1)), + toStartOfWeek(timestamp, 8), + toStartOfInterval(timestamp, toIntervalDay(1)), + xxxx17, + xxxx16, + xxxx14, + xxxx9, + xxxx10, + xxxx21, + xxxx22, + xxxx11 + ) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(timestamp) +ORDER BY (xxxx17, xxxx14, xxxx16, toStartOfDay(timestamp), left(xxxx19, 10), timestamp); + +INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'Airtel' as xxxx14 FROM generateRandom() LIMIT 100; +INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'BSNL' as xxxx14 FROM generateRandom() LIMIT 100; +INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'xxx' as xxxx14 FROM generateRandom() LIMIT 100; + +select sum(1) from test where toStartOfInterval(timestamp, INTERVAL 1 day) >= TIMESTAMP '2022-02-01 01:00:00' and xxxx14 in ('Airtel', 'BSNL') and xxxx1 = 1 GROUP BY xxxx16; + +drop table test; diff --git a/tests/queries/0_stateless/02542_case_no_else.reference b/tests/queries/0_stateless/02542_case_no_else.reference new file mode 100644 index 00000000000..8f3fdf29168 --- /dev/null +++ b/tests/queries/0_stateless/02542_case_no_else.reference @@ -0,0 +1,3 @@ +2 +1 Z +1 Z diff --git a/tests/queries/0_stateless/02542_case_no_else.sql b/tests/queries/0_stateless/02542_case_no_else.sql new file mode 100644 index 00000000000..0c7975a750e --- /dev/null +++ b/tests/queries/0_stateless/02542_case_no_else.sql @@ -0,0 +1,14 @@ +SELECT CASE 1 WHEN 1 THEN 2 END; + +SELECT id, + CASE id + WHEN 1 THEN 'Z' + END x +FROM (SELECT 1 as id); + +SELECT id, + CASE id + WHEN 1 THEN 'Z' + ELSE 'X' + END x +FROM (SELECT 1 as id); diff --git a/tests/queries/0_stateless/02542_transform_new.reference b/tests/queries/0_stateless/02542_transform_new.reference new file mode 100644 index 00000000000..b6eaa692c41 --- /dev/null +++ b/tests/queries/0_stateless/02542_transform_new.reference @@ -0,0 +1,32 @@ +1 +1 +1 +1 +9 +9 +\N +7 +1 +9 +7 +b +b +b +b +a +a +\N +c +sep1 +80000 +80000 +sep2 +80000 +80000 +sep3 +1 +sep4 +8000 +sep5 +8000 +sep6 diff --git a/tests/queries/0_stateless/02542_transform_new.sql b/tests/queries/0_stateless/02542_transform_new.sql new file mode 100644 index 00000000000..43da0a50731 --- /dev/null +++ b/tests/queries/0_stateless/02542_transform_new.sql @@ -0,0 +1,35 @@ +select transform(2, [1,2], [9,1], materialize(null)); +select transform(2, [1,2], [9,1], materialize(7)); +select transform(2, [1,2], [9,1], null); +select transform(2, [1,2], [9,1], 7); +select transform(1, [1,2], [9,1], null); +select transform(1, [1,2], [9,1], 7); +select transform(5, [1,2], [9,1], null); +select transform(5, [1,2], [9,1], 7); +select transform(2, [1,2], [9,1]); +select transform(1, [1,2], [9,1]); +select transform(7, [1,2], [9,1]); + +select transform(2, [1,2], ['a','b'], materialize(null)); +select transform(2, [1,2], ['a','b'], materialize('c')); +select transform(2, [1,2], ['a','b'], null); +select transform(2, [1,2], ['a','b'], 'c'); +select transform(1, [1,2], ['a','b'], null); +select transform(1, [1,2], ['a','b'], 'c'); +select transform(5, [1,2], ['a','b'], null); +select transform(5, [1,2], ['a','b'], 'c'); + +select 'sep1'; +SELECT transform(number, [2], [toDecimal32(1, 1)], materialize(80000)) as x FROM numbers(2); +select 'sep2'; +SELECT transform(number, [2], [toDecimal32(1, 1)], 80000) as x FROM numbers(2); +select 'sep3'; +SELECT transform(toDecimal32(2, 1), [toDecimal32(2, 1)], [1]); +select 'sep4'; +SELECT transform(8000, [1], [toDecimal32(2, 1)]); +select 'sep5'; +SELECT transform(toDecimal32(8000,0), [1], [toDecimal32(2, 1)]); +select 'sep6'; +SELECT transform(-9223372036854775807, [-1], [toDecimal32(1024, 3)]) FROM system.numbers LIMIT 7; -- { serverError BAD_ARGUMENTS } +SELECT [NULL, NULL, NULL, NULL], transform(number, [2147483648], [toDecimal32(1, 2)]) AS x FROM numbers(257) WHERE materialize(10); -- { serverError BAD_ARGUMENTS } +SELECT transform(-2147483649, [1], [toDecimal32(1, 2)]) GROUP BY [1] WITH TOTALS; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02542_transform_old.reference b/tests/queries/0_stateless/02542_transform_old.reference new file mode 100644 index 00000000000..d03b17d40a3 --- /dev/null +++ b/tests/queries/0_stateless/02542_transform_old.reference @@ -0,0 +1,72 @@ +google +other +yahoo +yandex +#1 +20 +21 +22 +29 +#2 +0 +1 +3 +5 +7 +8 +9 +20 +21 +29 +#3 +20 +21 +22 +29 +#4 +google +other +yahoo +yandex +#5 +0 +1 +3 +5 +7 +8 +9 +google +yahoo +yandex +---- +google +other +yahoo +yandex +#1 +20 +21 +22 +29 +#3 +20 +21 +22 +29 +#4 +google +other +yahoo +yandex +---- +2000 +2100 +2200 +2900 +#1 +2000 +2100 +2200 +2900 +---- diff --git a/tests/queries/0_stateless/02542_transform_old.sql b/tests/queries/0_stateless/02542_transform_old.sql new file mode 100644 index 00000000000..01a960ec367 --- /dev/null +++ b/tests/queries/0_stateless/02542_transform_old.sql @@ -0,0 +1,25 @@ +SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#1'; +SELECT transform(number, [2, 4, 6], [29, 20, 21], 22) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#2'; +SELECT transform(number, [2, 4, 6], [29, 20, 21]) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#3'; +SELECT transform(toString(number), ['2', '4', '6'], [29, 20, 21], 22) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#4'; +SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo'], 'other') as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#5'; +SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo']) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '----'; +SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], materialize('other')) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#1'; +SELECT transform(number, [2, 4, 6], [29, 20, 21], materialize(22)) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#3'; +SELECT transform(toString(number), ['2', '4', '6'], [29, 20, 21], materialize(22)) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#4'; +SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo'], materialize('other')) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '----'; +SELECT transform(number, [2, 4, 6], [2900, 2000, 2100], 2200) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '#1'; +SELECT transform(number, [2, 4, 6], [2900, 2000, 2100], materialize(2200)) as x FROM numbers(10) GROUP BY x ORDER BY x; +SELECT '----'; +SELECT transform(number, [1], [null]) FROM system.numbers LIMIT 1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02543_alter_rename_modify_stuck.reference b/tests/queries/0_stateless/02543_alter_rename_modify_stuck.reference new file mode 100644 index 00000000000..156128e3dd2 --- /dev/null +++ b/tests/queries/0_stateless/02543_alter_rename_modify_stuck.reference @@ -0,0 +1 @@ +{"v":"1","v2":"77"} diff --git a/tests/queries/0_stateless/02543_alter_rename_modify_stuck.sh b/tests/queries/0_stateless/02543_alter_rename_modify_stuck.sh new file mode 100755 index 00000000000..adaf1846552 --- /dev/null +++ b/tests/queries/0_stateless/02543_alter_rename_modify_stuck.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS table_to_rename" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE table_to_rename(v UInt64, v1 UInt64)ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0" + +$CLICKHOUSE_CLIENT --query="INSERT INTO table_to_rename VALUES (1, 1)" + + +# we want to following mutations to stuck +# That is why we stop merges and wait in loops until they actually start +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES table_to_rename" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE table_to_rename RENAME COLUMN v1 to v2" & + +counter=0 retries=60 + +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "show create table table_to_rename") + if [[ $result == *"v2"* ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + + +$CLICKHOUSE_CLIENT --query="ALTER TABLE table_to_rename UPDATE v2 = 77 WHERE 1 = 1 SETTINGS mutations_sync = 2" & + +counter=0 retries=60 + +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "SELECT count() from system.mutations where database='${CLICKHOUSE_DATABASE}' and table='table_to_rename'") + if [[ $result == "2" ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + + +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES table_to_rename" + +wait + +$CLICKHOUSE_CLIENT --query="SELECT * FROM table_to_rename FORMAT JSONEachRow" + + + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS table_to_rename" diff --git a/tests/queries/0_stateless/02543_alter_update_rename_stuck.reference b/tests/queries/0_stateless/02543_alter_update_rename_stuck.reference new file mode 100644 index 00000000000..156128e3dd2 --- /dev/null +++ b/tests/queries/0_stateless/02543_alter_update_rename_stuck.reference @@ -0,0 +1 @@ +{"v":"1","v2":"77"} diff --git a/tests/queries/0_stateless/02543_alter_update_rename_stuck.sh b/tests/queries/0_stateless/02543_alter_update_rename_stuck.sh new file mode 100755 index 00000000000..e801fbedab7 --- /dev/null +++ b/tests/queries/0_stateless/02543_alter_update_rename_stuck.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS table_to_rename" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE table_to_rename(v UInt64, v1 UInt64)ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0" + +$CLICKHOUSE_CLIENT --query="INSERT INTO table_to_rename VALUES (1, 1)" + + +# we want to following mutations to stuck +# That is why we stop merges and wait in loops until they actually start +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES table_to_rename" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE table_to_rename UPDATE v1 = 77 WHERE 1 = 1 SETTINGS mutations_sync = 2" & + +counter=0 retries=60 + +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "SELECT count() from system.mutations where database='${CLICKHOUSE_DATABASE}' and table='table_to_rename'") + if [[ $result == "1" ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + +$CLICKHOUSE_CLIENT --query="ALTER TABLE table_to_rename RENAME COLUMN v1 to v2" & + + +# it will not introduce any flakyness +# just wait that mutation doesn't start +sleep 3 + +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES table_to_rename" + +wait + +$CLICKHOUSE_CLIENT --query="SELECT * FROM table_to_rename FORMAT JSONEachRow" + + + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS table_to_rename" diff --git a/tests/queries/0_stateless/02552_siphash128_reference.reference b/tests/queries/0_stateless/02552_siphash128_reference.reference index a831c691ce7..452e9910660 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.reference +++ b/tests/queries/0_stateless/02552_siphash128_reference.reference @@ -126,5 +126,5 @@ E3040C00EB28F15366CA73CBD872E740 1 1 1 -1CE422FEE7BD8DE20000000000000000 -1CE422FEE7BD8DE20000000000000000 +1 +1 diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index 323561654b9..c238e51b690 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -203,5 +203,5 @@ select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError 48 } select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError 48 } -select hex(sipHash128Reference()); -select hex(sipHash128ReferenceKeyed()); +SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; +SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.reference b/tests/queries/0_stateless/02555_davengers_rename_chain.reference new file mode 100644 index 00000000000..a9fc4b395e2 --- /dev/null +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.reference @@ -0,0 +1,26 @@ +{"a1":"1","b1":"2","c":"3"} +~~~~~~~ +{"a1":"1","b1":"2","c":"3"} +{"a1":"4","b1":"5","c":"6"} +~~~~~~~ +{"a1":"1","b1":"2","c":"3"} +{"a1":"4","b1":"5","c":"6"} +{"a1":"7","b1":"8","c":"9"} +~~~~~~~ +{"b":"1","a":"2","c":"3"} +{"b":"4","a":"5","c":"6"} +{"b":"7","a":"8","c":"9"} +~~~~~~~ +{"a1":"1","b1":"2","c":"3"} +~~~~~~~ +{"a1":"1","b1":"2","c":"3"} +{"a1":"4","b1":"5","c":"6"} +~~~~~~~ +{"a1":"1","b1":"2","c":"3"} +{"a1":"4","b1":"5","c":"6"} +{"a1":"7","b1":"8","c":"9"} +~~~~~~~ +{"b":"1","a":"2","c":"3"} +{"b":"4","a":"5","c":"6"} +{"b":"7","a":"8","c":"9"} +~~~~~~~ diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.sh b/tests/queries/0_stateless/02555_davengers_rename_chain.sh new file mode 100755 index 00000000000..b23f8085fd7 --- /dev/null +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# Tags: replica +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata" + +$CLICKHOUSE_CLIENT -n --query="CREATE TABLE wrong_metadata( + a UInt64, + b UInt64, + c UInt64 +) +ENGINE ReplicatedMergeTree('/test/{database}/tables/wrong_metadata', '1') +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0" + +$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata VALUES (1, 2, 3)" + + +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES wrong_metadata" + + +$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0" + +counter=0 retries=60 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE wrong_metadata") + if [[ $result == *"\`a1\` UInt64"* ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata ORDER BY a1 FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + +$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata VALUES (4, 5, 6)" + + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata ORDER BY a1 FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + + +$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0" + +counter=0 retries=60 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table = 'wrong_metadata' AND database='${CLICKHOUSE_DATABASE}'") + if [[ $result == *"b1 TO a"* ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + +$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata VALUES (7, 8, 9)" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata ORDER by a1 FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES wrong_metadata" + +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA wrong_metadata" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata order by a FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata_compact" + +$CLICKHOUSE_CLIENT -n --query="CREATE TABLE wrong_metadata_compact( + a UInt64, + b UInt64, + c UInt64 +) +ENGINE ReplicatedMergeTree('/test/{database}/tables/wrong_metadata_compact', '1') +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 10000000" + +$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata_compact VALUES (1, 2, 3)" + +$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES wrong_metadata_compact" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata_compact RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0" + +counter=0 retries=60 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE wrong_metadata_compact") + if [[ $result == *"\`a1\` UInt64"* ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact ORDER BY a1 FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + +$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata_compact VALUES (4, 5, 6)" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact ORDER BY a1 FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + +$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata_compact RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0" + +counter=0 retries=60 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + result=$($CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table = 'wrong_metadata_compact' AND database='${CLICKHOUSE_DATABASE}'") + if [[ $result == *"b1 TO a"* ]]; then + break; + fi + sleep 0.1 + ((++counter)) +done + +$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata_compact VALUES (7, 8, 9)" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact ORDER by a1 FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + +$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES wrong_metadata_compact" + +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA wrong_metadata_compact" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact order by a FORMAT JSONEachRow" +$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata_compact" diff --git a/tests/queries/0_stateless/02559_nested_multiple_levels_default.reference b/tests/queries/0_stateless/02559_nested_multiple_levels_default.reference index b0214e0e7c7..9ed0fb620a4 100644 --- a/tests/queries/0_stateless/02559_nested_multiple_levels_default.reference +++ b/tests/queries/0_stateless/02559_nested_multiple_levels_default.reference @@ -1,6 +1,4 @@ data_compact Compact [[]] -data_memory InMemory -[[]] data_wide Wide [[]] diff --git a/tests/queries/0_stateless/02559_nested_multiple_levels_default.sql b/tests/queries/0_stateless/02559_nested_multiple_levels_default.sql index 156af5c7784..9dcdab82acb 100644 --- a/tests/queries/0_stateless/02559_nested_multiple_levels_default.sql +++ b/tests/queries/0_stateless/02559_nested_multiple_levels_default.sql @@ -10,26 +10,12 @@ CREATE TABLE data_compact ) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_rows_for_compact_part=0, min_bytes_for_compact_part=0, min_rows_for_wide_part=100, min_bytes_for_wide_part=1e9; +SETTINGS min_rows_for_wide_part=100, min_bytes_for_wide_part=1e9; INSERT INTO data_compact VALUES ([0]); ALTER TABLE data_compact ADD COLUMN root.nested_array Array(Array(UInt8)); SELECT table, part_type FROM system.parts WHERE table = 'data_compact' AND database = currentDatabase(); SELECT root.nested_array FROM data_compact; --- memory -DROP TABLE IF EXISTS data_memory; -CREATE TABLE data_memory -( - `root.array` Array(UInt8), -) -ENGINE = MergeTree() -ORDER BY tuple() -SETTINGS min_rows_for_compact_part=100, min_bytes_for_compact_part=1e9, min_rows_for_wide_part=100, min_bytes_for_wide_part=1e9, in_memory_parts_enable_wal=0; -INSERT INTO data_memory VALUES ([0]); -ALTER TABLE data_memory ADD COLUMN root.nested_array Array(Array(UInt8)); -SELECT table, part_type FROM system.parts WHERE table = 'data_memory' AND database = currentDatabase(); -SELECT root.nested_array FROM data_memory; - -- wide DROP TABLE IF EXISTS data_wide; CREATE TABLE data_wide @@ -38,7 +24,7 @@ CREATE TABLE data_wide ) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0, min_rows_for_wide_part=0, min_bytes_for_wide_part=0; +SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0; INSERT INTO data_wide VALUES ([0]); ALTER TABLE data_wide ADD COLUMN root.nested_array Array(Array(UInt8)); SELECT table, part_type FROM system.parts WHERE table = 'data_wide' AND database = currentDatabase(); diff --git a/tests/queries/0_stateless/02560_agg_state_deserialization_hash_table_crash.sql b/tests/queries/0_stateless/02560_agg_state_deserialization_hash_table_crash.sql index 5b6662faeb3..d85cacc70be 100644 --- a/tests/queries/0_stateless/02560_agg_state_deserialization_hash_table_crash.sql +++ b/tests/queries/0_stateless/02560_agg_state_deserialization_hash_table_crash.sql @@ -1,4 +1,4 @@ DROP TABLE IF EXISTS tab; create table tab (d Int64, s AggregateFunction(groupUniqArrayArray, Array(UInt64)), c SimpleAggregateFunction(groupUniqArrayArray, Array(UInt64))) engine = SummingMergeTree() order by d; -INSERT INTO tab VALUES (1, 'このコー'); -- { clientError CANNOT_ALLOCATE_MEMORY } +INSERT INTO tab VALUES (1, 'このコー'); -- { clientError 128 } DROP TABLE tab; diff --git a/tests/queries/0_stateless/02560_window_ntile.reference b/tests/queries/0_stateless/02560_window_ntile.reference index cae0586fa8c..1045fc1011a 100644 --- a/tests/queries/0_stateless/02560_window_ntile.reference +++ b/tests/queries/0_stateless/02560_window_ntile.reference @@ -22,7 +22,28 @@ select a, b, ntile(3) over (partition by a order by b rows between unbounded pre 1 7 3 1 8 3 1 9 3 -select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(3) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +0 0 1 +0 1 1 +0 2 1 +0 3 1 +0 4 2 +0 5 2 +0 6 2 +0 7 3 +0 8 3 +0 9 3 +1 0 1 +1 1 1 +1 2 1 +1 3 1 +1 4 2 +1 5 2 +1 6 2 +1 7 3 +1 8 3 +1 9 3 +select a, b, ntile(2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); 0 0 1 0 1 1 0 2 1 @@ -43,7 +64,7 @@ select a, b, ntile(2) over (partition by a order by b rows between unbounded pre 1 7 2 1 8 2 1 9 2 -select a, b, ntile(1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); 0 0 1 0 1 1 0 2 1 @@ -64,7 +85,7 @@ select a, b, ntile(1) over (partition by a order by b rows between unbounded pre 1 7 1 1 8 1 1 9 1 -select a, b, ntile(100) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(100) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); 0 0 1 0 1 2 0 2 3 @@ -85,7 +106,7 @@ select a, b, ntile(100) over (partition by a order by b rows between unbounded p 1 7 8 1 8 9 1 9 10 -select a, b, ntile(65535) over (partition by a order by b rows between unbounded preceding and unbounded following) from (select 1 as a, number as b from numbers(65535)) limit 100; +select a, b, ntile(65535) over (partition by a order by b) from (select 1 as a, number as b from numbers(65535)) limit 100; 1 0 1 1 1 2 1 2 3 @@ -187,11 +208,11 @@ select a, b, ntile(65535) over (partition by a order by b rows between unbounded 1 98 99 1 99 100 -- Bad arguments -select a, b, ntile(3.0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile('2') over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile(0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile(-2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile(b + 1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(3.0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile('2') over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(-2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(b + 1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -- Bad window type select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } diff --git a/tests/queries/0_stateless/02560_window_ntile.sql b/tests/queries/0_stateless/02560_window_ntile.sql index 4c25ecf4dd2..f2acf8fc94e 100644 --- a/tests/queries/0_stateless/02560_window_ntile.sql +++ b/tests/queries/0_stateless/02560_window_ntile.sql @@ -2,17 +2,20 @@ -- Normal cases select a, b, ntile(3) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -select a, b, ntile(1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -select a, b, ntile(100) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -select a, b, ntile(65535) over (partition by a order by b rows between unbounded preceding and unbounded following) from (select 1 as a, number as b from numbers(65535)) limit 100; +select a, b, ntile(3) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(100) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); +select a, b, ntile(65535) over (partition by a order by b) from (select 1 as a, number as b from numbers(65535)) limit 100; + + -- Bad arguments -select a, b, ntile(3.0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile('2') over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile(0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile(-2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -select a, b, ntile(b + 1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(3.0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile('2') over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(-2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } +select a, b, ntile(b + 1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } -- Bad window type select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 } diff --git a/tests/queries/0_stateless/02561_temporary_table_grants.reference b/tests/queries/0_stateless/02561_temporary_table_grants.reference new file mode 100644 index 00000000000..b462a5a7baa --- /dev/null +++ b/tests/queries/0_stateless/02561_temporary_table_grants.reference @@ -0,0 +1,4 @@ +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02561_temporary_table_grants.sh b/tests/queries/0_stateless/02561_temporary_table_grants.sh new file mode 100755 index 00000000000..6e0c96786e8 --- /dev/null +++ b/tests/queries/0_stateless/02561_temporary_table_grants.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +user=user_$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS $user" +$CLICKHOUSE_CLIENT --query "CREATE USER $user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant CREATE TEMPORARY TABLE" > /dev/null && echo "OK" + +$CLICKHOUSE_CLIENT --query "GRANT CREATE TEMPORARY TABLE ON *.* TO $user" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant CREATE ARBITRARY TEMPORARY TABLE" > /dev/null && echo "OK" + +$CLICKHOUSE_CLIENT --query "GRANT CREATE ARBITRARY TEMPORARY TABLE ON *.* TO $user" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant FILE" > /dev/null && echo "OK" + +$CLICKHOUSE_CLIENT --query "GRANT FILE ON *.* TO $user" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query it's necessary to have grant URL" > /dev/null && echo "OK" + +$CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO $user" + +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" + +$CLICKHOUSE_CLIENT --query "DROP USER $user" diff --git a/tests/queries/0_stateless/02561_temporary_table_sessions.reference b/tests/queries/0_stateless/02561_temporary_table_sessions.reference new file mode 100644 index 00000000000..b3890873523 --- /dev/null +++ b/tests/queries/0_stateless/02561_temporary_table_sessions.reference @@ -0,0 +1,7 @@ +OK +1 d +2 e +3 f +1 a +2 b +3 c diff --git a/tests/queries/0_stateless/02561_temporary_table_sessions.sh b/tests/queries/0_stateless/02561_temporary_table_sessions.sh new file mode 100755 index 00000000000..a810a48cdf3 --- /dev/null +++ b/tests/queries/0_stateless/02561_temporary_table_sessions.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SESSION_ID_A="$RANDOM$RANDOM$RANDOM" +SESSION_ID_B="$RANDOM$RANDOM$RANDOM" + +# Create temporary table and insert in SESSION_ID_A +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_A}" -d 'CREATE TEMPORARY TABLE table_merge_tree_02561 (id UInt64, info String) ENGINE = MergeTree ORDER BY id' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_A}" -d "INSERT INTO table_merge_tree_02561 VALUES (1, 'a'), (2, 'b'), (3, 'c')" + +# Select from SESSION_ID_B +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_B}" -d "SELECT * FROM table_merge_tree_02561" | tr -d '\n' | grep -F 'UNKNOWN_TABLE' > /dev/null && echo "OK" + +# Create temporary table, insert and select in SESSION_ID_B +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_B}" -d 'CREATE TEMPORARY TABLE table_merge_tree_02561 (id UInt64, info String) ENGINE = MergeTree ORDER BY id' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_B}" -d "INSERT INTO table_merge_tree_02561 VALUES (1, 'd'), (2, 'e'), (3, 'f')" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_B}" -d "SELECT * FROM table_merge_tree_02561" + +# Select from SESSION_ID_A +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_A}" -d "SELECT * FROM table_merge_tree_02561" + +# Drop tables in both sessions +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_A}" -d "DROP TEMPORARY TABLE table_merge_tree_02561" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID_B}" -d "DROP TEMPORARY TABLE table_merge_tree_02561" diff --git a/tests/queries/0_stateless/02564_date_format.reference b/tests/queries/0_stateless/02564_date_format.reference index a5f2e362cc6..38c0d39340b 100644 --- a/tests/queries/0_stateless/02564_date_format.reference +++ b/tests/queries/0_stateless/02564_date_format.reference @@ -17,7 +17,7 @@ Jan Jan 366 366 00 00 01 01 -33 00 +January January \n \n AM AM AM diff --git a/tests/queries/0_stateless/02564_query_id_header.reference b/tests/queries/0_stateless/02564_query_id_header.reference index 413e8929f36..fa56fc23e3e 100644 --- a/tests/queries/0_stateless/02564_query_id_header.reference +++ b/tests/queries/0_stateless/02564_query_id_header.reference @@ -20,3 +20,7 @@ DROP TABLE t_query_id_header < Content-Type: text/plain; charset=UTF-8 < X-ClickHouse-Query-Id: query_id < X-ClickHouse-Timezone: timezone +BAD SQL +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone diff --git a/tests/queries/0_stateless/02564_query_id_header.sh b/tests/queries/0_stateless/02564_query_id_header.sh index 67ddbcfcc46..7184422a030 100755 --- a/tests/queries/0_stateless/02564_query_id_header.sh +++ b/tests/queries/0_stateless/02564_query_id_header.sh @@ -28,3 +28,4 @@ run_and_check_headers "INSERT INTO t_query_id_header VALUES (1)" run_and_check_headers "EXISTS TABLE t_query_id_header" run_and_check_headers "SELECT * FROM t_query_id_header" run_and_check_headers "DROP TABLE t_query_id_header" +run_and_check_headers "BAD SQL" diff --git a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference index 6f23097612e..87e9f407cc8 100644 --- a/tests/queries/0_stateless/02565_analyzer_limit_settings.reference +++ b/tests/queries/0_stateless/02565_analyzer_limit_settings.reference @@ -62,7 +62,6 @@ SELECT * FROM numbers(10); SELECT * FROM numbers(10) LIMIT 3 OFFSET 2; 3 4 -5 SELECT * FROM numbers(10) LIMIT 5 OFFSET 2; 3 4 diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference new file mode 100644 index 00000000000..a3d8a33f757 --- /dev/null +++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.reference @@ -0,0 +1,18 @@ +CapnProto +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 +Avro +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 +Arrow +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 +Parquet +ipv6 Nullable(FixedString(16)) +ipv4 Nullable(UInt32) +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 +ORC +ipv6 Nullable(String) +ipv4 Nullable(Int32) +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 +BSONEachRow +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 +MsgPack +2001:db8:11a3:9d7:1f34:8a2e:7a0:765d 127.0.0.1 diff --git a/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh new file mode 100755 index 00000000000..d27a2f9fcbb --- /dev/null +++ b/tests/queries/0_stateless/02566_ipv4_ipv6_binary_formats.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "CapnProto" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format CapnProto settings format_schema='$CURDIR/format_schemas/02566_ipv4_ipv6:Message'" > 02566_ipv4_ipv6_data.capnp +${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.capnp, auto, 'ipv6 IPv6, ipv4 IPv4') settings format_schema='$CURDIR/format_schemas/02566_ipv4_ipv6:Message'" +rm 02566_ipv4_ipv6_data.capnp + +echo "Avro" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format Avro" > 02566_ipv4_ipv6_data.avro +${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.avro, auto, 'ipv6 IPv6, ipv4 IPv4')" +rm 02566_ipv4_ipv6_data.avro + +echo "Arrow" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format Arrow" > 02566_ipv4_ipv6_data.arrow +${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.arrow, auto, 'ipv6 IPv6, ipv4 IPv4')" +rm 02566_ipv4_ipv6_data.arrow + +echo "Parquet" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format Parquet" > 02566_ipv4_ipv6_data.parquet +${CLICKHOUSE_LOCAL} -q "desc file(02566_ipv4_ipv6_data.parquet)" +${CLICKHOUSE_LOCAL} -q "select ipv6, toIPv4(ipv4) from file(02566_ipv4_ipv6_data.parquet, auto, 'ipv6 IPv6, ipv4 UInt32')" +rm 02566_ipv4_ipv6_data.parquet + +echo "ORC" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format ORC" > 02566_ipv4_ipv6_data.orc +${CLICKHOUSE_LOCAL} -q "desc file(02566_ipv4_ipv6_data.orc)" +${CLICKHOUSE_LOCAL} -q "select ipv6, toIPv4(ipv4) from file(02566_ipv4_ipv6_data.orc, auto, 'ipv6 IPv6, ipv4 UInt32')" +rm 02566_ipv4_ipv6_data.orc + +echo "BSONEachRow" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format BSONEachRow" > 02566_ipv4_ipv6_data.bson +${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.bson, auto, 'ipv6 IPv6, ipv4 IPv4')" +rm 02566_ipv4_ipv6_data.bson + +echo "MsgPack" +${CLICKHOUSE_LOCAL} -q "select '2001:db8:11a3:9d7:1f34:8a2e:7a0:765d'::IPv6 as ipv6, '127.0.0.1'::IPv4 as ipv4 format MsgPack" > 02566_ipv4_ipv6_data.msgpack +${CLICKHOUSE_LOCAL} -q "select * from file(02566_ipv4_ipv6_data.msgpack, auto, 'ipv6 IPv6, ipv4 IPv4')" +rm 02566_ipv4_ipv6_data.msgpack + + diff --git a/tests/queries/0_stateless/02567_and_consistency.reference b/tests/queries/0_stateless/02567_and_consistency.reference index bcb2b5aecfb..e0014f187a8 100644 --- a/tests/queries/0_stateless/02567_and_consistency.reference +++ b/tests/queries/0_stateless/02567_and_consistency.reference @@ -6,10 +6,8 @@ true ===== true ===== -===== 1 ===== -===== allow_experimental_analyzer true #45440 diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql index f02185a1a52..8ad06bd68cb 100644 --- a/tests/queries/0_stateless/02567_and_consistency.sql +++ b/tests/queries/0_stateless/02567_and_consistency.sql @@ -42,31 +42,10 @@ SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; -SELECT toBool(sin(SUM(number))) AS x -FROM -( - SELECT 1 AS number -) -GROUP BY number -HAVING 1 AND sin(sum(number)) -SETTINGS enable_optimize_predicate_expression = 1; -- { serverError 59 } - -SELECT '====='; - SELECT 1 and sin(1); SELECT '====='; -SELECT toBool(sin(SUM(number))) AS x -FROM -( - SELECT 1 AS number -) -GROUP BY number -HAVING x AND sin(1) -SETTINGS enable_optimize_predicate_expression = 0; -- { serverError 59 } - -SELECT '====='; SELECT 'allow_experimental_analyzer'; SET allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02567_native_type_conversions.reference b/tests/queries/0_stateless/02567_native_type_conversions.reference new file mode 100644 index 00000000000..5c223870c11 --- /dev/null +++ b/tests/queries/0_stateless/02567_native_type_conversions.reference @@ -0,0 +1,3 @@ +1 +42 +1 diff --git a/tests/queries/0_stateless/02567_native_type_conversions.sh b/tests/queries/0_stateless/02567_native_type_conversions.sh new file mode 100755 index 00000000000..976c42f07c1 --- /dev/null +++ b/tests/queries/0_stateless/02567_native_type_conversions.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 42::UInt8 as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=0 2>&1 | grep "TYPE_MISMATCH" -c + +$CLICKHOUSE_LOCAL -q "select 42::UInt8 as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=1 + +$CLICKHOUSE_LOCAL -q "select 'Hello' as x format Native" | $CLICKHOUSE_LOCAL --structure="x UInt64" --input-format="Native" -q "select * from table" --input_format_native_allow_types_conversion=1 2>&1 | grep 'while converting column "x" from type String to type UInt64' -c + diff --git a/tests/queries/0_stateless/02568_and_consistency.reference b/tests/queries/0_stateless/02568_and_consistency.reference new file mode 100644 index 00000000000..07a8041d0ee --- /dev/null +++ b/tests/queries/0_stateless/02568_and_consistency.reference @@ -0,0 +1,5 @@ += +1554690688 += +1554690688 += diff --git a/tests/queries/0_stateless/02568_and_consistency.sql b/tests/queries/0_stateless/02568_and_consistency.sql new file mode 100644 index 00000000000..4e76da78427 --- /dev/null +++ b/tests/queries/0_stateless/02568_and_consistency.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (c0 Int32, PRIMARY KEY (c0)) ENGINE=MergeTree; +INSERT INTO t1 VALUES (1554690688); + +select '='; + +SELECT MIN(t1.c0) +FROM t1 +GROUP BY + (-sign(cos(t1.c0))) * (-max2(t1.c0, t1.c0 / t1.c0)), + t1.c0 * t1.c0, + sign(-exp(-t1.c0)) +HAVING -(-(MIN(t1.c0) + MIN(t1.c0))) AND (pow('{b' > '-657301241', log(-1004522121)) IS NOT NULL) +UNION ALL +SELECT MIN(t1.c0) +FROM t1 +GROUP BY + (-sign(cos(t1.c0))) * (-max2(t1.c0, t1.c0 / t1.c0)), + t1.c0 * t1.c0, + sign(-exp(-t1.c0)) +HAVING NOT (-(-(MIN(t1.c0) + MIN(t1.c0))) AND (pow('{b' > '-657301241', log(-1004522121)) IS NOT NULL)) +UNION ALL +SELECT MIN(t1.c0) +FROM t1 +GROUP BY + (-sign(cos(t1.c0))) * (-max2(t1.c0, t1.c0 / t1.c0)), + t1.c0 * t1.c0, + sign(-exp(-t1.c0)) +HAVING (-(-(MIN(t1.c0) + MIN(t1.c0))) AND (pow('{b' > '-657301241', log(-1004522121)) IS NOT NULL)) IS NULL +SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0; + +select '='; + +SELECT MIN(t1.c0) +FROM t1 +GROUP BY t1.c0 +HAVING and(MIN(t1.c0) + MIN(t1.c0), 1) +SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0; + +select '='; + +DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh index 9c158d6241b..d7c8944b89d 100755 --- a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh +++ b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh @@ -47,6 +47,7 @@ $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT --query " SELECT 'id_' || splitByChar('_', query_id)[1] AS id FROM system.text_log WHERE query_id LIKE '%$query_id_suffix' AND message LIKE '%$message%' + ORDER BY id " $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t_async_insert_fallback" diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference new file mode 100644 index 00000000000..fc2e6b78122 --- /dev/null +++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.reference @@ -0,0 +1,28 @@ +-- { echoOn } +select * from data_02572 order by key; +insert into data_02572 settings materialized_views_ignore_errors=1 values (2); +select * from data_02572 order by key; +2 +-- check system.query_views_log +system flush logs; +-- lower(status) to pass through clickhouse-test "exception" check +select lower(status::String), errorCodeToName(exception_code) +from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572') + order by event_date, event_time +; +exceptionwhileprocessing UNKNOWN_TABLE +-- materialized_views_ignore_errors=0 +insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE } +select * from data_02572 order by key; +1 +2 +create table receiver_02572 as data_02572; +insert into data_02572 values (3); +select * from data_02572 order by key; +1 +2 +3 +select * from receiver_02572 order by key; +3 diff --git a/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql new file mode 100644 index 00000000000..2d1f824b9b1 --- /dev/null +++ b/tests/queries/0_stateless/02572_materialized_views_ignore_errors.sql @@ -0,0 +1,40 @@ +set prefer_localhost_replica=1; + +drop table if exists data_02572; +drop table if exists proxy_02572; +drop table if exists push_to_proxy_mv_02572; +drop table if exists receiver_02572; + +create table data_02572 (key Int) engine=Memory(); + +create table proxy_02572 (key Int) engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572'); +-- ensure that insert fails +insert into proxy_02572 values (1); -- { serverError UNKNOWN_TABLE } + +-- proxy data with MV +create materialized view push_to_proxy_mv_02572 to proxy_02572 as select * from data_02572; + +-- { echoOn } +select * from data_02572 order by key; + +insert into data_02572 settings materialized_views_ignore_errors=1 values (2); +select * from data_02572 order by key; +-- check system.query_views_log +system flush logs; +-- lower(status) to pass through clickhouse-test "exception" check +select lower(status::String), errorCodeToName(exception_code) +from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572') + order by event_date, event_time +; + +-- materialized_views_ignore_errors=0 +insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE } +select * from data_02572 order by key; + +create table receiver_02572 as data_02572; + +insert into data_02572 values (3); +select * from data_02572 order by key; +select * from receiver_02572 order by key; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference new file mode 100644 index 00000000000..eeba62c5dc8 --- /dev/null +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -0,0 +1,19 @@ +-- { echoOn } +insert into buffer_02572 values (1); +-- ensure that the flush was not direct +select * from data_02572; +select * from copy_02572; +-- we cannot use OPTIMIZE, this will attach query context, so let's wait +select sleepEachRow(1) from numbers(3*2) format Null; +select * from data_02572; +1 +select * from copy_02572; +1 +system flush logs; +select count() > 0, lower(status::String), errorCodeToName(exception_code) + from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') + group by 2, 3 +; +1 queryfinish OK diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql new file mode 100644 index 00000000000..dc229412b13 --- /dev/null +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql @@ -0,0 +1,35 @@ +-- INSERT buffer_02572 -> data_02572 -> copy_02572 +-- ^^ +-- push to system.query_views_log + +drop table if exists buffer_02572; +drop table if exists data_02572; +drop table if exists copy_02572; +drop table if exists mv_02572; + +create table copy_02572 (key Int) engine=Memory(); +create table data_02572 (key Int) engine=Memory(); +create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, + /* never direct flush for flush from background thread */ + /* min_time= */ 3, 3, + 1, 1e9, + 1, 1e9); +create materialized view mv_02572 to copy_02572 as select * from data_02572; + +-- { echoOn } +insert into buffer_02572 values (1); +-- ensure that the flush was not direct +select * from data_02572; +select * from copy_02572; +-- we cannot use OPTIMIZE, this will attach query context, so let's wait +select sleepEachRow(1) from numbers(3*2) format Null; +select * from data_02572; +select * from copy_02572; + +system flush logs; +select count() > 0, lower(status::String), errorCodeToName(exception_code) + from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') + group by 2, 3 +; diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference new file mode 100644 index 00000000000..029f80b46b0 --- /dev/null +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference @@ -0,0 +1,2 @@ +11 queryfinish OK +11 querystart OK diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql new file mode 100644 index 00000000000..a7a74190821 --- /dev/null +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql @@ -0,0 +1,30 @@ +-- Tags: no-parallel, no-replicated-database +-- Tag no-parallel: due to attaching to system.query_log +-- Tag no-replicated-database: Replicated database will has extra queries + +-- Attach MV to system.query_log and check that writing query_log will not fail + +set log_queries=1; + +drop table if exists log_proxy_02572; +drop table if exists push_to_logs_proxy_mv_02572; + +-- create log tables +system flush logs; +create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572'); +create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log; + +select 1 format Null; +system flush logs; +system flush logs; + +drop table log_proxy_02572; +drop table push_to_logs_proxy_mv_02572; + +system flush logs; +-- lower() to pass through clickhouse-test "exception" check +select count(), lower(type::String), errorCodeToName(exception_code) + from system.query_log + where current_database = currentDatabase() + group by 2, 3 + order by 2; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference index 32db2512eab..c17e235ddad 100644 --- a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference +++ b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference @@ -1,12 +1,13 @@ -- { echoOn } -- for pure PREWHERE it is not addressed yet. SELECT * FROM m PREWHERE a = 'OK'; -OK 0 +OK 1970-01-01 0 SELECT * FROM m PREWHERE f = 0; -- { serverError ILLEGAL_PREWHERE } SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; -OK 0 +OK 1970-01-01 0 SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; -OK 0 +OK 1970-01-01 0 -- { echoOn } SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; -OK 0 +OK 1970-01-01 0 +OK 1970-01-01 0 diff --git a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql index 0f1d582a26e..88c7923a570 100644 --- a/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql +++ b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql @@ -6,20 +6,22 @@ DROP TABLE IF EXISTS t2; CREATE TABLE m ( - `a` String, - `f` UInt8 + a String, + date Date, + f UInt8 ) ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); CREATE TABLE t1 ( a String, + date Date, f UInt8 ALIAS 0 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; -INSERT INTO t1 VALUES ('OK'); +INSERT INTO t1 (a) VALUES ('OK'); -- { echoOn } -- for pure PREWHERE it is not addressed yet. @@ -32,12 +34,13 @@ SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; CREATE TABLE t2 ( a String, + date Date, f UInt8, ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; -INSERT INTO t2 VALUES ('OK', 1); +INSERT INTO t2 (a) VALUES ('OK'); -- { echoOn } SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02577_keepermap_delete_update.reference b/tests/queries/0_stateless/02577_keepermap_delete_update.reference index 8ca8c0ca5a2..4a71f86a258 100644 --- a/tests/queries/0_stateless/02577_keepermap_delete_update.reference +++ b/tests/queries/0_stateless/02577_keepermap_delete_update.reference @@ -1,32 +1,32 @@ -1 Some string 0 -2 Some other string 0 -3 random 0 -4 random2 0 +1 Some string 0 0 +2 Some other string 0 0 +3 random 0 0 +4 random2 0 0 ----------- -3 random 0 -4 random2 0 +3 random 0 0 +4 random2 0 0 ----------- -3 random 0 +3 random 0 0 ----------- 0 ----------- -1 String 10 -2 String 20 -3 String 30 -4 String 40 +1 String 10 0 +2 String 20 0 +3 String 30 0 +4 String 40 0 ----------- -1 String 10 -2 String 20 -3 Another 30 -4 Another 40 +1 String 10 0 +2 String 20 0 +3 Another 30 1 +4 Another 40 1 ----------- -1 String 10 -2 String 20 -3 Another 30 -4 Another 40 +1 String 10 0 +2 String 20 0 +3 Another 30 1 +4 Another 40 1 ----------- -1 String 102 -2 String 202 -3 Another 302 -4 Another 402 +1 String 102 1 +2 String 202 1 +3 Another 302 2 +4 Another 402 2 ----------- diff --git a/tests/queries/0_stateless/02577_keepermap_delete_update.sql b/tests/queries/0_stateless/02577_keepermap_delete_update.sql index 199a653822c..ae80e6ead29 100644 --- a/tests/queries/0_stateless/02577_keepermap_delete_update.sql +++ b/tests/queries/0_stateless/02577_keepermap_delete_update.sql @@ -1,42 +1,44 @@ -- Tags: no-ordinary-database, no-fasttest -DROP TABLE IF EXISTS 02661_keepermap_delete_update; +DROP TABLE IF EXISTS 02577_keepermap_delete_update; -CREATE TABLE 02661_keepermap_delete_update (key UInt64, value String, value2 UInt64) ENGINE=KeeperMap('/' || currentDatabase() || '/test02661_keepermap_delete_update') PRIMARY KEY(key); +CREATE TABLE 02577_keepermap_delete_update (key UInt64, value String, value2 UInt64) ENGINE=KeeperMap('/' || currentDatabase() || '/test02577_keepermap_delete_update') PRIMARY KEY(key); -INSERT INTO 02661_keepermap_delete_update VALUES (1, 'Some string', 0), (2, 'Some other string', 0), (3, 'random', 0), (4, 'random2', 0); +INSERT INTO 02577_keepermap_delete_update VALUES (1, 'Some string', 0), (2, 'Some other string', 0), (3, 'random', 0), (4, 'random2', 0); -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -DELETE FROM 02661_keepermap_delete_update WHERE value LIKE 'Some%string'; +DELETE FROM 02577_keepermap_delete_update WHERE value LIKE 'Some%string'; -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -ALTER TABLE 02661_keepermap_delete_update DELETE WHERE key >= 4; +ALTER TABLE 02577_keepermap_delete_update DELETE WHERE key >= 4; -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -DELETE FROM 02661_keepermap_delete_update WHERE 1 = 1; -SELECT count() FROM 02661_keepermap_delete_update; +DELETE FROM 02577_keepermap_delete_update WHERE 1 = 1; +SELECT count() FROM 02577_keepermap_delete_update; SELECT '-----------'; -INSERT INTO 02661_keepermap_delete_update VALUES (1, 'String', 10), (2, 'String', 20), (3, 'String', 30), (4, 'String', 40); -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +INSERT INTO 02577_keepermap_delete_update VALUES (1, 'String', 10), (2, 'String', 20), (3, 'String', 30), (4, 'String', 40); +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -ALTER TABLE 02661_keepermap_delete_update UPDATE value = 'Another' WHERE key > 2; -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +ALTER TABLE 02577_keepermap_delete_update UPDATE value = 'Another' WHERE key > 2; +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -ALTER TABLE 02661_keepermap_delete_update UPDATE key = key * 10 WHERE 1 = 1; -- { serverError 36 } -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +ALTER TABLE 02577_keepermap_delete_update UPDATE key = key * 10 WHERE 1 = 1; -- { serverError BAD_ARGUMENTS } +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -ALTER TABLE 02661_keepermap_delete_update UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100; -SELECT * FROM 02661_keepermap_delete_update ORDER BY key; +ALTER TABLE 02577_keepermap_delete_update UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100; +SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key; SELECT '-----------'; -DROP TABLE IF EXISTS 02661_keepermap_delete_update; +ALTER TABLE 02577_keepermap_delete_update ON CLUSTER test_shard_localhost UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100; -- { serverError BAD_ARGUMENTS } + +DROP TABLE IF EXISTS 02577_keepermap_delete_update; diff --git a/tests/queries/0_stateless/02579_fill_empty_chunk.sql b/tests/queries/0_stateless/02579_fill_empty_chunk.sql index 14ae322d8c9..cbdbd7a9f84 100644 --- a/tests/queries/0_stateless/02579_fill_empty_chunk.sql +++ b/tests/queries/0_stateless/02579_fill_empty_chunk.sql @@ -1,5 +1,7 @@ -- this SELECT produces empty chunk in FillingTransform +SET enable_positional_arguments = 0; + SELECT 2 AS x, arrayJoin([NULL, NULL, NULL]) diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference new file mode 100644 index 00000000000..492b12dba56 --- /dev/null +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.reference @@ -0,0 +1,14 @@ +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh new file mode 100755 index 00000000000..89b5147f026 --- /dev/null +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='brotli'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='gzip'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zlib'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table" + + +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='lz4_frame'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table" +$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table" + diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference new file mode 100644 index 00000000000..3a92fcf283d --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.reference @@ -0,0 +1,9 @@ +40000 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +5000 all_1_1_0_9 +5000 all_2_2_0_9 +5000 all_3_3_0_9 +5000 all_4_4_0_9 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql new file mode 100644 index 00000000000..92e372d0cdb --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql @@ -0,0 +1,27 @@ +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan + +DROP TABLE IF EXISTS 02581_trips; + +CREATE TABLE 02581_trips(id UInt32, description String, id2 UInt32, PRIMARY KEY id) ENGINE=MergeTree ORDER BY id; + +-- Make multiple parts +INSERT INTO 02581_trips SELECT number, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+10000000, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+20000000, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+30000000, '', number FROM numbers(10000); + +SELECT count() from 02581_trips WHERE description = ''; + +SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '02581_trips' AND active ORDER BY name; + +-- Start multiple mutations simultaneously +SYSTEM STOP MERGES 02581_trips; +ALTER TABLE 02581_trips UPDATE description='5' WHERE id IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips UPDATE description='6' WHERE id IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips DELETE WHERE id IN (SELECT (number*10 + 7)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; +ALTER TABLE 02581_trips UPDATE description='8' WHERE id IN (SELECT (number*10 + 8)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=0; +SYSTEM START MERGES 02581_trips; +DELETE FROM 02581_trips WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(200000000)); +SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; + +DROP TABLE 02581_trips; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference new file mode 100644 index 00000000000..452e0e0801e --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference @@ -0,0 +1,58 @@ +-- { echoOn } +SELECT count(), _part FROM 02581_trips GROUP BY _part ORDER BY _part; +10000 all_1_1_0 +10000 all_2_2_0 +10000 all_3_3_0 +10000 all_4_4_0 +-- Run mutation with a 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='1' WHERE id IN (SELECT (number*10+1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; +9000 all_1_1_0_5 +9000 all_2_2_0_5 +9000 all_3_3_0_5 +9000 all_4_4_0_5 +ALTER TABLE 02581_trips UPDATE description='2' WHERE id IN (SELECT (number*10+2)::UInt32 FROM numbers(10000)) SETTINGS mutations_sync=2; +SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; +8000 all_1_1_0_6 +8000 all_2_2_0_6 +8000 all_3_3_0_6 +8000 all_4_4_0_6 +-- Run mutation with `id 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +28000 +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; +SELECT count() from 02581_trips WHERE description = ''; +28000 +-- Run mutation with func(`id`) IN big subquery +ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +28000 +-- Run mutation with non-PK `id2` IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +24000 +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR + (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +20000 +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR + (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +16000 +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR + ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +12000 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql new file mode 100644 index 00000000000..7b52a89b16f --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql @@ -0,0 +1,60 @@ +DROP TABLE IF EXISTS 02581_trips; + +CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id; + +-- Make multiple parts +INSERT INTO 02581_trips SELECT number, number, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+10000, number+10000, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+20000, number+20000, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+30000, number+30000, '' FROM numbers(10000); + +-- { echoOn } +SELECT count(), _part FROM 02581_trips GROUP BY _part ORDER BY _part; + +-- Run mutation with a 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='1' WHERE id IN (SELECT (number*10+1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; +ALTER TABLE 02581_trips UPDATE description='2' WHERE id IN (SELECT (number*10+2)::UInt32 FROM numbers(10000)) SETTINGS mutations_sync=2; +SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; + +-- Run mutation with `id 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with func(`id`) IN big subquery +ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with non-PK `id2` IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR + (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR + (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR + ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +-- { echoOff } + +DROP TABLE 02581_trips; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference new file mode 100644 index 00000000000..3a7410d925f --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.reference @@ -0,0 +1,12 @@ +40000 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +36000 +32000 +28000 +24000 +20000 +16000 +12000 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql new file mode 100644 index 00000000000..21ff453cd8e --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql @@ -0,0 +1,57 @@ +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan + +DROP TABLE IF EXISTS 02581_trips; + +CREATE TABLE 02581_trips(id UInt32, description String, id2 UInt32, PRIMARY KEY id) ENGINE=MergeTree ORDER BY id; + +-- Make multiple parts +INSERT INTO 02581_trips SELECT number, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+10000000, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+20000000, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+30000000, '', number FROM numbers(10000); + +SELECT count() from 02581_trips WHERE description = ''; + + +SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '02581_trips' AND active ORDER BY name; + +-- Run mutation with `id` a 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with func(`id`) IN big subquery +ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with non-PK `id2` IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(200000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000))) OR + (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(200000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000))) OR + (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(200000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000))) OR + ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(200000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +DROP TABLE 02581_trips; diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_with_storage_set.reference b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_with_storage_set.reference new file mode 100644 index 00000000000..267105947b9 --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_with_storage_set.reference @@ -0,0 +1,7 @@ +40000 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +36000 +32000 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_with_storage_set.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_with_storage_set.sql new file mode 100644 index 00000000000..9a14f78628b --- /dev/null +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_with_storage_set.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS 02581_trips; + +CREATE TABLE 02581_trips(id UInt32, description String, id2 UInt32, PRIMARY KEY id) ENGINE=MergeTree ORDER BY id; + +-- Make multiple parts +INSERT INTO 02581_trips SELECT number, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+10000000, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+20000000, '', number FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+30000000, '', number FROM numbers(10000); + +SELECT count() from 02581_trips WHERE description = ''; + + +SELECT name FROM system.parts WHERE database=currentDatabase() AND table = '02581_trips' AND active ORDER BY name; + +CREATE TABLE 02581_set (id UInt32) ENGINE = Set; + +INSERT INTO 02581_set SELECT number*10+7 FROM numbers(10000000); + +-- Run mutation with PK `id` IN big set +ALTER TABLE 02581_trips UPDATE description='d' WHERE id IN 02581_set SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +INSERT INTO 02581_set SELECT number*10+8 FROM numbers(10000000); + +-- Run mutation with PK `id` IN big set after it is updated +ALTER TABLE 02581_trips UPDATE description='d' WHERE id IN 02581_set SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + + +DROP TABLE 02581_set; +DROP TABLE 02581_trips; diff --git a/tests/queries/0_stateless/25337_width_bucket.reference b/tests/queries/0_stateless/02581_width_bucket.reference similarity index 100% rename from tests/queries/0_stateless/25337_width_bucket.reference rename to tests/queries/0_stateless/02581_width_bucket.reference diff --git a/tests/queries/0_stateless/25337_width_bucket.sql b/tests/queries/0_stateless/02581_width_bucket.sql similarity index 100% rename from tests/queries/0_stateless/25337_width_bucket.sql rename to tests/queries/0_stateless/02581_width_bucket.sql diff --git a/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference b/tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.reference similarity index 100% rename from tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.reference rename to tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.reference diff --git a/tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql b/tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.sql similarity index 100% rename from tests/queries/0_stateless/25339_analyzer_join_subquery_empty_column_list.sql rename to tests/queries/0_stateless/02582_analyzer_join_subquery_empty_column_list.sql diff --git a/tests/queries/0_stateless/02664_async_reading_with_small_limit.reference b/tests/queries/0_stateless/02582_async_reading_with_small_limit.reference similarity index 100% rename from tests/queries/0_stateless/02664_async_reading_with_small_limit.reference rename to tests/queries/0_stateless/02582_async_reading_with_small_limit.reference diff --git a/tests/queries/0_stateless/02664_async_reading_with_small_limit.sql b/tests/queries/0_stateless/02582_async_reading_with_small_limit.sql similarity index 100% rename from tests/queries/0_stateless/02664_async_reading_with_small_limit.sql rename to tests/queries/0_stateless/02582_async_reading_with_small_limit.sql diff --git a/tests/queries/0_stateless/02670_map_literal_cast.reference b/tests/queries/0_stateless/02583_map_literal_cast.reference similarity index 100% rename from tests/queries/0_stateless/02670_map_literal_cast.reference rename to tests/queries/0_stateless/02583_map_literal_cast.reference diff --git a/tests/queries/0_stateless/02670_map_literal_cast.sql b/tests/queries/0_stateless/02583_map_literal_cast.sql similarity index 100% rename from tests/queries/0_stateless/02670_map_literal_cast.sql rename to tests/queries/0_stateless/02583_map_literal_cast.sql diff --git a/tests/queries/0_stateless/02584_compressor_codecs.reference b/tests/queries/0_stateless/02584_compressor_codecs.reference new file mode 100644 index 00000000000..bb0850568bb --- /dev/null +++ b/tests/queries/0_stateless/02584_compressor_codecs.reference @@ -0,0 +1,9 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02584_compressor_codecs.sh b/tests/queries/0_stateless/02584_compressor_codecs.sh new file mode 100755 index 00000000000..fad6847b792 --- /dev/null +++ b/tests/queries/0_stateless/02584_compressor_codecs.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "Hello, World!" > 02584_test_data + +$CLICKHOUSE_COMPRESSOR --codec 'Delta' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' +$CLICKHOUSE_COMPRESSOR --codec 'Delta(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'Delta([1,2])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'Delta(4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'; + +$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' +$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta([1,2])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'DoubleDelta(4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'; + +$CLICKHOUSE_COMPRESSOR --codec 'Gorilla' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' +$CLICKHOUSE_COMPRESSOR --codec 'Gorilla(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'Gorilla([1,2])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'Gorilla(4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'; + +$CLICKHOUSE_COMPRESSOR --codec 'FPC' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'; +$CLICKHOUSE_COMPRESSOR --codec 'FPC(5)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'; +$CLICKHOUSE_COMPRESSOR --codec 'FPC(5, 1)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'FPC([1,2,3])' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "ILLEGAL_CODEC_PARAMETER"; +$CLICKHOUSE_COMPRESSOR --codec 'FPC(5, 4)' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out'; + + +$CLICKHOUSE_COMPRESSOR --codec 'T64' --codec 'LZ4' --input '02584_test_data' --output '02584_test_out' 2>&1 | grep -c "CANNOT_COMPRESS"; + +rm 02584_test_data 02584_test_out + diff --git a/tests/queries/0_stateless/02674_range_ipv4.reference b/tests/queries/0_stateless/02584_range_ipv4.reference similarity index 100% rename from tests/queries/0_stateless/02674_range_ipv4.reference rename to tests/queries/0_stateless/02584_range_ipv4.reference diff --git a/tests/queries/0_stateless/02674_range_ipv4.sql b/tests/queries/0_stateless/02584_range_ipv4.sql similarity index 100% rename from tests/queries/0_stateless/02674_range_ipv4.sql rename to tests/queries/0_stateless/02584_range_ipv4.sql diff --git a/tests/queries/0_stateless/00975_live_view_create.reference b/tests/queries/0_stateless/02585_query_status_deadlock.reference similarity index 100% rename from tests/queries/0_stateless/00975_live_view_create.reference rename to tests/queries/0_stateless/02585_query_status_deadlock.reference diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh new file mode 100755 index 00000000000..227ecb1c1b2 --- /dev/null +++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1" + +$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q " +create temporary table tmp as select * from numbers(500000000); +select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null & + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + +while true +do + res=$($CLICKHOUSE_CLIENT -q "select query, event_time from system.query_log where query_id = '$QUERY_ID' and query like 'select%' limit 1") + if [ -n "$res" ]; then + break + fi + sleep 1 +done + +$CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null + diff --git a/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference b/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference new file mode 100644 index 00000000000..5b38606d1fd --- /dev/null +++ b/tests/queries/0_stateless/02587_csv_big_numbers_inference.reference @@ -0,0 +1,4 @@ +c1 Nullable(Float64) +100000000000000000000 +c1 Nullable(Float64) +-100000000000000000000 diff --git a/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql b/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql new file mode 100644 index 00000000000..45a93034524 --- /dev/null +++ b/tests/queries/0_stateless/02587_csv_big_numbers_inference.sql @@ -0,0 +1,5 @@ +desc format('CSV', '100000000000000000000'); +select * from format('CSV', '100000000000000000000'); +desc format('CSV', '-100000000000000000000'); +select * from format('CSV', '-100000000000000000000'); + diff --git a/tests/queries/0_stateless/02588_avro_date32_and_decimals.reference b/tests/queries/0_stateless/02588_avro_date32_and_decimals.reference new file mode 100644 index 00000000000..3a2d4fcf4f7 --- /dev/null +++ b/tests/queries/0_stateless/02588_avro_date32_and_decimals.reference @@ -0,0 +1,5 @@ +Date32 1942-08-16 +Decimal(9, 4) 4242.4242 +Decimal(18, 14) 4242.4242 +Decimal(38, 34) 4242.4242 +Decimal(76, 64) 4242.4242 diff --git a/tests/queries/0_stateless/02588_avro_date32_and_decimals.sh b/tests/queries/0_stateless/02588_avro_date32_and_decimals.sh new file mode 100755 index 00000000000..57363b85f0e --- /dev/null +++ b/tests/queries/0_stateless/02588_avro_date32_and_decimals.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select toInt32(-10000)::Date32 as d format Avro" | $CLICKHOUSE_LOCAL --input-format=Avro -q "select toTypeName(d), d from table" + +$CLICKHOUSE_LOCAL -q "select 4242.4242::Decimal32(4) as d format Avro" | $CLICKHOUSE_LOCAL --input-format=Avro -q "select toTypeName(d), d from table" +$CLICKHOUSE_LOCAL -q "select 4242.4242::Decimal64(14) as d format Avro" | $CLICKHOUSE_LOCAL --input-format=Avro -q "select toTypeName(d), d from table" +$CLICKHOUSE_LOCAL -q "select 4242.4242::Decimal128(34) as d format Avro" | $CLICKHOUSE_LOCAL --input-format=Avro -q "select toTypeName(d), d from table" +$CLICKHOUSE_LOCAL -q "select 4242.4242::Decimal256(64) as d format Avro" | $CLICKHOUSE_LOCAL --input-format=Avro -q "select toTypeName(d), d from table" + diff --git a/tests/queries/0_stateless/02588_parquet_bug.reference b/tests/queries/0_stateless/02588_parquet_bug.reference new file mode 100644 index 00000000000..44de58ae5c3 --- /dev/null +++ b/tests/queries/0_stateless/02588_parquet_bug.reference @@ -0,0 +1,3 @@ +cta 224.0.90.10 1670964058771367936 64066044 NYSE cqs_pillar quote \N \N \N 82.92 1 R 82.97 2 R +1670964058771367936 +1670946478544048640 ARCA cqs_pillar diff --git a/tests/queries/0_stateless/02588_parquet_bug.sh b/tests/queries/0_stateless/02588_parquet_bug.sh new file mode 100755 index 00000000000..f7e4ecf5e4c --- /dev/null +++ b/tests/queries/0_stateless/02588_parquet_bug.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/02588_data.parquet') where exchange_ts = 1670964058771367936" +$CLICKHOUSE_LOCAL -q "select exchange_ts from file('$CURDIR/data_parquet/02588_data.parquet') where exchange_ts = 1670964058771367936" +$CLICKHOUSE_LOCAL -q "select exchange_ts, market, product from file('$CURDIR/data_parquet/02588_data.parquet') where exchange_ts = 1670946478544048640" + diff --git a/tests/queries/0_stateless/00979_live_view_watch_continuous_aggregates.reference b/tests/queries/0_stateless/02589_bson_invalid_document_size.reference similarity index 100% rename from tests/queries/0_stateless/00979_live_view_watch_continuous_aggregates.reference rename to tests/queries/0_stateless/02589_bson_invalid_document_size.reference diff --git a/tests/queries/0_stateless/02589_bson_invalid_document_size.sql b/tests/queries/0_stateless/02589_bson_invalid_document_size.sql new file mode 100644 index 00000000000..b536b8d5c92 --- /dev/null +++ b/tests/queries/0_stateless/02589_bson_invalid_document_size.sql @@ -0,0 +1,4 @@ +set input_format_parallel_parsing=1; +set max_threads=0; +select * from format(BSONEachRow, 'x UInt32', x'00000000'); -- {serverError INCORRECT_DATA} + diff --git a/tests/queries/0_stateless/00979_live_view_watch_live.reference b/tests/queries/0_stateless/02590_bson_duplicate_column.reference similarity index 100% rename from tests/queries/0_stateless/00979_live_view_watch_live.reference rename to tests/queries/0_stateless/02590_bson_duplicate_column.reference diff --git a/tests/queries/0_stateless/02590_bson_duplicate_column.sql b/tests/queries/0_stateless/02590_bson_duplicate_column.sql new file mode 100644 index 00000000000..ea70fb9ba68 --- /dev/null +++ b/tests/queries/0_stateless/02590_bson_duplicate_column.sql @@ -0,0 +1 @@ +select * from format(BSONEachRow, 'x UInt32, y UInt32', x'1a0000001078002a0000001078002a0000001079002a00000000'); -- {serverError INCORRECT_DATA} diff --git a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference new file mode 100644 index 00000000000..fbce8ae2026 --- /dev/null +++ b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.reference @@ -0,0 +1,8 @@ +SELECT +3 0 0 +3 0 0 +INSERT +CHECK +1 +2 +6 0 2 diff --git a/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh new file mode 100755 index 00000000000..5da643bd17b --- /dev/null +++ b/tests/queries/0_stateless/02590_interserver_mode_client_info_initial_query_start_time.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: interserver mode requires SSL +# +# Test that checks that some of ClientInfo correctly passed in inter-server mode. +# NOTE: we need .sh test (.sql is not enough) because queries on remote nodes does not have current_database = currentDatabase() +# +# Check-style suppression: select * from system.query_log where current_database = currentDatabase(); + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function get_query_id() { random_str 10; } + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists buf; + drop table if exists dist; + drop table if exists data; + + create table data (key Int) engine=Memory(); + create table dist as data engine=Distributed(test_cluster_interserver_secret, currentDatabase(), data, key); + create table dist_dist as data engine=Distributed(test_cluster_interserver_secret, currentDatabase(), dist, key); + system stop distributed sends dist; +" + +echo "SELECT" +query_id="$(get_query_id)" +# initialize connection, but actually if there are other tables that uses this +# cluster then, it will be created long time ago, but this is OK for this +# test, since we care about the difference between NOW() and there should +# not be any significant difference. +$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -q "select * from dist" +$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q " + system flush logs; + select count(), countIf(initial_query_start_time_microseconds != query_start_time_microseconds), countIf(event_time - initial_query_start_time > 3) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String}; +" + +sleep 6 + +query_id="$(get_query_id)" +# this query (and all subsequent) should reuse the previous connection (at least most of the time) +$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -q "select * from dist" + +$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q " + system flush logs; + select count(), countIf(initial_query_start_time_microseconds != query_start_time_microseconds), countIf(event_time - initial_query_start_time > 3) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String}; +" + +echo "INSERT" +query_id="$(get_query_id)" +$CLICKHOUSE_CLIENT --prefer_localhost_replica=0 --query_id "$query_id" -nm -q " + insert into dist_dist values (1),(2); + select * from data; +" + +sleep 3 +$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "system flush distributed dist_dist" +sleep 1 +$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q "system flush distributed dist" + +echo "CHECK" +$CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q " + select * from data order by key; + system flush logs; + select count(), countIf(initial_query_start_time_microseconds != query_start_time_microseconds), countIf(event_time - initial_query_start_time > 3) from system.query_log where type = 'QueryFinish' and initial_query_id = {query_id:String}; +" diff --git a/tests/queries/0_stateless/02591_bson_long_tuple.reference b/tests/queries/0_stateless/02591_bson_long_tuple.reference new file mode 100644 index 00000000000..98eb634721e Binary files /dev/null and b/tests/queries/0_stateless/02591_bson_long_tuple.reference differ diff --git a/tests/queries/0_stateless/02591_bson_long_tuple.sql b/tests/queries/0_stateless/02591_bson_long_tuple.sql new file mode 100644 index 00000000000..e24150c8e6d --- /dev/null +++ b/tests/queries/0_stateless/02591_bson_long_tuple.sql @@ -0,0 +1,2 @@ +select tuple(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) as x format BSONEachRow; + diff --git a/tests/queries/0_stateless/02591_protobuf_nested_arrays.reference b/tests/queries/0_stateless/02591_protobuf_nested_arrays.reference new file mode 100644 index 00000000000..ff21f7fc2c9 --- /dev/null +++ b/tests/queries/0_stateless/02591_protobuf_nested_arrays.reference @@ -0,0 +1 @@ +[[[42,42],[],[42]],[[],[42],[42,42,42,42]]] diff --git a/tests/queries/0_stateless/02591_protobuf_nested_arrays.sh b/tests/queries/0_stateless/02591_protobuf_nested_arrays.sh new file mode 100755 index 00000000000..b6714932f3a --- /dev/null +++ b/tests/queries/0_stateless/02591_protobuf_nested_arrays.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +SCHEMADIR=$CURDIR/format_schemas +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select [[[42, 42], [], [42]], [[], [42], [42, 42, 42, 42]]] as a format Protobuf settings format_schema = '$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/00825_protobuf_format_array_3dim:ABC" --structure="a Array(Array(Array(Int32)))" -q "select * from table" + diff --git a/tests/queries/0_stateless/02592_avro_more_types.reference b/tests/queries/0_stateless/02592_avro_more_types.reference new file mode 100644 index 00000000000..5de415be619 --- /dev/null +++ b/tests/queries/0_stateless/02592_avro_more_types.reference @@ -0,0 +1,7 @@ +c1 FixedString(16) +c2 FixedString(16) +c3 FixedString(32) +c4 FixedString(32) +c5 Map(String, Int32) +c6 Decimal(18, 2) +42 42 42 42 {42:42} 2020-01-01 00:00:00.00 diff --git a/tests/queries/0_stateless/02592_avro_more_types.sh b/tests/queries/0_stateless/02592_avro_more_types.sh new file mode 100755 index 00000000000..7b87acd5f96 --- /dev/null +++ b/tests/queries/0_stateless/02592_avro_more_types.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, map(42, 42) as c5, toDateTime64('2020-01-01', 2) as c6 format Avro" | $CLICKHOUSE_LOCAL --input-format Avro --table test -q "desc test" + +$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, map(42, 42) as c5, toDateTime64('2020-01-01', 2) as c6 format Avro" | $CLICKHOUSE_LOCAL --structure "c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Map(UInt32, UInt32), c6 DateTime64(2)" --input-format Avro --table test -q "select * from test" + + + diff --git a/tests/queries/0_stateless/02592_avro_records_with_same_names.reference b/tests/queries/0_stateless/02592_avro_records_with_same_names.reference new file mode 100644 index 00000000000..7237be8884e --- /dev/null +++ b/tests/queries/0_stateless/02592_avro_records_with_same_names.reference @@ -0,0 +1 @@ +((1,2)) ((3,4,5)) diff --git a/tests/queries/0_stateless/02592_avro_records_with_same_names.sh b/tests/queries/0_stateless/02592_avro_records_with_same_names.sh new file mode 100755 index 00000000000..92a7846d3bd --- /dev/null +++ b/tests/queries/0_stateless/02592_avro_records_with_same_names.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select tuple(tuple(1, 2))::Tuple(x Tuple(a UInt32, b UInt32)) as c1, tuple(tuple(3, 4, 5))::Tuple(x Tuple(c UInt32, d UInt32, e UInt32)) as c2 format Avro" | $CLICKHOUSE_LOCAL --input-format Avro --structure 'c1 Tuple(x Tuple(a UInt32, b UInt32)), c2 Tuple(x Tuple(c UInt32, d UInt32, e UInt32))' -q "select * from table" diff --git a/tests/queries/0_stateless/02593_bson_more_types.reference b/tests/queries/0_stateless/02593_bson_more_types.reference new file mode 100644 index 00000000000..e84b3c8efb8 --- /dev/null +++ b/tests/queries/0_stateless/02593_bson_more_types.reference @@ -0,0 +1,5 @@ +{'a\\u0000b':42} +c1 Nullable(Int32) +c2 Nullable(Int32) +c3 Map(String, Nullable(Int32)) +a b {42:42} diff --git a/tests/queries/0_stateless/02593_bson_more_types.sh b/tests/queries/0_stateless/02593_bson_more_types.sh new file mode 100755 index 00000000000..cd7b9e0aaf3 --- /dev/null +++ b/tests/queries/0_stateless/02593_bson_more_types.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL -q "select map('a\0b', 42) as c1 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test --structure "c1 Map(String, UInt32)" -q "select * from test" + +$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, map(42, 42) as c3 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test -q "desc test" + +$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, map(42, 42) as c3 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test --structure "c1 Enum8('a' = 1), c2 Enum16('b' = 1), c3 Map(UInt32, UInt32)" -q "select * from test" + + diff --git a/tests/queries/0_stateless/02594_msgpack_more_types.reference b/tests/queries/0_stateless/02594_msgpack_more_types.reference new file mode 100644 index 00000000000..8ccf11ccdb4 --- /dev/null +++ b/tests/queries/0_stateless/02594_msgpack_more_types.reference @@ -0,0 +1,2 @@ +a b 2020-01-01 42 42 42 42 42.42 42.42 42.42 42.42 +(42,'Hello') ({42:[1,2,3]},[([(1,2),(1,2)],'Hello',[1,2,3]),([],'World',[1])]) diff --git a/tests/queries/0_stateless/02594_msgpack_more_types.sh b/tests/queries/0_stateless/02594_msgpack_more_types.sh new file mode 100755 index 00000000000..bddfb5ad829 --- /dev/null +++ b/tests/queries/0_stateless/02594_msgpack_more_types.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, '2020-01-01'::Date32 as c3, 42::Int128 as c4, 42::UInt128 as c5, 42::Int256 as c6, 42::UInt256 as c7, 42.42::Decimal32(2) as c8, 42.42::Decimal64(2) as c9, 42.42::Decimal128(2) as c10, 42.42::Decimal256(2) as c11 format MsgPack" | $CLICKHOUSE_LOCAL --input-format MsgPack --structure="c1 Enum8('a' = 1), c2 Enum16('b' = 1), c3 Date32, c4 Int128, c5 UInt128, c6 Int256, c7 UInt256, c8 Decimal32(2), c9 Decimal64(2), c10 Decimal128(2), c11 Decimal256(2)" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select tuple(42, 'Hello') as c1, tuple(map(42, [1, 2, 3]), [tuple([tuple(1, 2), tuple(1, 2)], 'Hello', [1, 2, 3]), tuple([], 'World', [1])]) as c2 format MsgPack" | $CLICKHOUSE_LOCAL --input-format MsgPack --structure="c1 Tuple(UInt32, String), c2 Tuple(Map(UInt32, Array(UInt32)), Array(Tuple(Array(Tuple(UInt32, UInt32)), String, Array(UInt32))))" -q "select * from table" + diff --git a/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference new file mode 100644 index 00000000000..dee75e5a50c --- /dev/null +++ b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference @@ -0,0 +1,5 @@ +42 42 42 42 a b +42 42 42 42 a b +42 42 42 42 a b 42.42 0.0.0.0 +\N +\N diff --git a/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh new file mode 100755 index 00000000000..f2127d69f37 --- /dev/null +++ b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, 'a'::Enum8('a' = 1) as c5, 'b'::Enum16('b' = 1) as c6 format Parquet" | $CLICKHOUSE_LOCAL --input-format Parquet --structure="c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Enum8('a' = 1), c6 Enum16('b' = 1)" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, 'a'::Enum8('a' = 1) as c5, 'b'::Enum16('b' = 1) as c6 format Arrow" | $CLICKHOUSE_LOCAL --input-format Arrow --structure="c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Enum8('a' = 1), c6 Enum16('b' = 1)" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, 'a'::Enum8('a' = 1) as c5, 'b'::Enum16('b' = 1) as c6, 42.42::Decimal256(2) as c7, '0.0.0.0'::IPv4 as c8 format ORC" | $CLICKHOUSE_LOCAL --input-format ORC --structure="c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Enum8('a' = 1), c6 Enum16('b' = 1), c7 Decimal256(2), c8 IPv4" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select NULL::Nullable(IPv6) as x format ORC" | $CLICKHOUSE_LOCAL --input-format ORC --structure="x Nullable(IPv6)" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select NULL::Nullable(UInt256) as x format ORC" | $CLICKHOUSE_LOCAL --input-format ORC --structure="x Nullable(UInt256)" -q "select * from table" + diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.reference b/tests/queries/0_stateless/02596_build_set_and_remote.reference new file mode 100644 index 00000000000..8d12196ae33 --- /dev/null +++ b/tests/queries/0_stateless/02596_build_set_and_remote.reference @@ -0,0 +1,19 @@ +-- {echoOn} +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one; +1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one); +1 +1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL; +1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; +1 +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A'; +1 +SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy; +1 +SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL; +1000.0001 \N 0 +1000.0001 257 0 +1000.0001 65536 0 +1000.0001 \N 0 diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.sql b/tests/queries/0_stateless/02596_build_set_and_remote.sql new file mode 100644 index 00000000000..7a904344c91 --- /dev/null +++ b/tests/queries/0_stateless/02596_build_set_and_remote.sql @@ -0,0 +1,14 @@ +-- {echoOn} +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one; + +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one); + +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL; + +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1; + +SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A'; + +SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy; + +SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL; diff --git a/tests/queries/0_stateless/02661_quantile_approx.reference b/tests/queries/0_stateless/02661_quantile_approx.reference new file mode 100644 index 00000000000..8369363aa9b --- /dev/null +++ b/tests/queries/0_stateless/02661_quantile_approx.reference @@ -0,0 +1,43 @@ +-- { echoOn } +with arrayJoin([0, 1, 2, 10]) as x select quantilesGK(100, 0.5, 0.4, 0.1)(x); +[1,1,0] +with arrayJoin([0, 6, 7, 9, 10]) as x select quantileGK(100, 0.5)(x); +7 +select quantilesGK(10000, 0.25, 0.5, 0.75, 0.0, 1.0, 0, 1)(number + 1) from numbers(1000); +[250,500,750,1,1000,1,1000] +select quantilesGK(10000, 0.01, 0.1, 0.11)(number + 1) from numbers(10); +[1,1,2] +with number + 1 as col select quantilesGK(10000, 0.25, 0.5, 0.75)(col), count(col), quantilesGK(10000, 0.0, 1.0)(col), sum(col) from numbers(1000); +[250,500,750] 1000 [1,1000] 500500 +select quantilesGK(1, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +[1,1,1,1,1] +select quantilesGK(10, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +[1,156,156,296,715] +select quantilesGK(100, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +[93,192,251,306,770] +select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +[99,199,249,313,776] +select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +[100,200,250,314,777] +select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } +select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } +select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select medianGK(100)(number) from numbers(10); +4 +select quantileGK(100)(number) from numbers(10); +4 +select quantileGK(100, 0.5)(number) from numbers(10); +4 +select quantileGK(100, 0.5, 0.75)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select quantileGK('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantileGK(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantileGK(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS } +select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } +select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select quantilesGK(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select quantilesGK(100, 0.5)(number) from numbers(10); +[4] +select quantilesGK('abc', 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantilesGK(1.23, 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantilesGK(-100, 0.5, 0.75)(number) from numbers(10); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02661_quantile_approx.sql b/tests/queries/0_stateless/02661_quantile_approx.sql new file mode 100644 index 00000000000..52c2979ad44 --- /dev/null +++ b/tests/queries/0_stateless/02661_quantile_approx.sql @@ -0,0 +1,41 @@ +set allow_experimental_analyzer = 1; + +-- { echoOn } +with arrayJoin([0, 1, 2, 10]) as x select quantilesGK(100, 0.5, 0.4, 0.1)(x); +with arrayJoin([0, 6, 7, 9, 10]) as x select quantileGK(100, 0.5)(x); + +select quantilesGK(10000, 0.25, 0.5, 0.75, 0.0, 1.0, 0, 1)(number + 1) from numbers(1000); +select quantilesGK(10000, 0.01, 0.1, 0.11)(number + 1) from numbers(10); + +with number + 1 as col select quantilesGK(10000, 0.25, 0.5, 0.75)(col), count(col), quantilesGK(10000, 0.0, 1.0)(col), sum(col) from numbers(1000); + +select quantilesGK(1, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +select quantilesGK(10, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +select quantilesGK(100, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); +select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000); + + +select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } +select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } +select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +select medianGK(100)(number) from numbers(10); +select quantileGK(100)(number) from numbers(10); +select quantileGK(100, 0.5)(number) from numbers(10); +select quantileGK(100, 0.5, 0.75)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select quantileGK('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantileGK(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantileGK(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS } + +select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS } +select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +select quantilesGK(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select quantilesGK(100, 0.5)(number) from numbers(10); +select quantilesGK('abc', 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantilesGK(1.23, 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select quantilesGK(-100, 0.5, 0.75)(number) from numbers(10); -- { serverError BAD_ARGUMENTS } +-- { echoOff } diff --git a/tests/queries/0_stateless/02662_first_last_value.reference b/tests/queries/0_stateless/02662_first_last_value.reference new file mode 100644 index 00000000000..0c4503d61ac --- /dev/null +++ b/tests/queries/0_stateless/02662_first_last_value.reference @@ -0,0 +1,20 @@ +-- { echo } + +-- create table +drop table if exists test; +create table test(`a` Nullable(Int32), `b` Nullable(Int32)) ENGINE = Memory; +insert into test (a,b) values (1,null), (2,3), (4, 5), (6,null); +-- first value +select first_value(b) from test; +3 +select first_value(b) ignore nulls from test; +3 +select first_value(b) respect nulls from test; +\N +-- last value +select last_value(b) from test; +5 +select last_value(b) ignore nulls from test; +5 +select last_value(b) respect nulls from test; +\N diff --git a/tests/queries/0_stateless/02662_first_last_value.sql b/tests/queries/0_stateless/02662_first_last_value.sql new file mode 100644 index 00000000000..126e89f5325 --- /dev/null +++ b/tests/queries/0_stateless/02662_first_last_value.sql @@ -0,0 +1,16 @@ +-- { echo } + +-- create table +drop table if exists test; +create table test(`a` Nullable(Int32), `b` Nullable(Int32)) ENGINE = Memory; +insert into test (a,b) values (1,null), (2,3), (4, 5), (6,null); + +-- first value +select first_value(b) from test; +select first_value(b) ignore nulls from test; +select first_value(b) respect nulls from test; + +-- last value +select last_value(b) from test; +select last_value(b) ignore nulls from test; +select last_value(b) respect nulls from test; diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference new file mode 100644 index 00000000000..3f5c8b6ed1f --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.reference @@ -0,0 +1,13 @@ +1_1_1_0 String Sparse +477 ['','foo'] +1_1_1_0_2 Nullable(String) Default +477 ['','foo'] +1_1_1_0_2 Nullable(String) Default +2_3_3_0 Nullable(String) Default +954 ['','foo'] +1_1_1_0_4 String Default +2_3_3_0_4 String Default +954 ['','foo'] +1_1_1_1_4 String Sparse +2_3_3_1_4 String Sparse +954 ['','foo'] diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql new file mode 100644 index 00000000000..3bf37e8e62b --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_1.sql @@ -0,0 +1,49 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_sparse_mutations_1; + +CREATE TABLE t_sparse_mutations_1 (key UInt8, id UInt64, s String) +ENGINE = MergeTree ORDER BY id PARTITION BY key +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_mutations_1 SELECT 1, number, if (number % 21 = 0, 'foo', '') FROM numbers (10000); + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active +ORDER BY name; + +SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1; + +ALTER TABLE t_sparse_mutations_1 MODIFY COLUMN s Nullable(String); + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active +ORDER BY name; + +SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1; + +INSERT INTO t_sparse_mutations_1 SELECT 2, number, if (number % 21 = 0, 'foo', '') FROM numbers (10000); + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active +ORDER BY name; + +SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1; + +ALTER TABLE t_sparse_mutations_1 MODIFY COLUMN s String; + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active +ORDER BY name; + +SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1; + +OPTIMIZE TABLE t_sparse_mutations_1 FINAL; + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_1' AND column = 's' AND active +ORDER BY name; + +SELECT countIf(s = 'foo'), arraySort(groupUniqArray(s)) FROM t_sparse_mutations_1; + +DROP TABLE t_sparse_mutations_1; diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference new file mode 100644 index 00000000000..64eb0119982 --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.reference @@ -0,0 +1,6 @@ +String Default +10000 49995000 +String Default +770 3848845 +String Sparse +770 3848845 diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql new file mode 100644 index 00000000000..561bd164200 --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_2.sql @@ -0,0 +1,33 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_sparse_mutations_2; + +CREATE TABLE t_sparse_mutations_2 (key UInt8, id UInt64, s String) +ENGINE = MergeTree ORDER BY id PARTITION BY key +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_mutations_2 SELECT 1, number, toString(number) FROM numbers (10000); + +SELECT type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_2' AND column = 's' AND active +ORDER BY name; + +SELECT count(), sum(s::UInt64) FROM t_sparse_mutations_2 WHERE s != ''; + +ALTER TABLE t_sparse_mutations_2 UPDATE s = '' WHERE id % 13 != 0; + +SELECT type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_2' AND column = 's' AND active +ORDER BY name; + +SELECT count(), sum(s::UInt64) FROM t_sparse_mutations_2 WHERE s != ''; + +OPTIMIZE TABLE t_sparse_mutations_2 FINAL; + +SELECT type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_2' AND column = 's' AND active +ORDER BY name; + +SELECT count(), sum(s::UInt64) FROM t_sparse_mutations_2 WHERE s != ''; + +DROP TABLE t_sparse_mutations_2; diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference new file mode 100644 index 00000000000..1501fd27fd5 --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.reference @@ -0,0 +1,11 @@ +String Default +Tuple(UInt64, UInt64, String, String, String) Default ['1','2','3','4','5'] ['UInt64','UInt64','String','String','String'] ['Default','Default','Default','Default','Default'] +10000 0 ['1'] ['0'] [''] +Tuple(UInt64, UInt64, String, String, String) Default ['1','2','3','4','5'] ['UInt64','UInt64','String','String','String'] ['Default','Sparse','Default','Default','Sparse'] +10000 0 ['1'] ['0'] [''] +Tuple(UInt64, UInt64, UInt64, UInt64, String) Default ['1','2','3','4','5'] ['UInt64','UInt64','UInt64','UInt64','String'] ['Default','Sparse','Default','Default','Sparse'] +10000 0 10000 0 [''] +Tuple(UInt64, UInt64, UInt64, UInt64, String) Default ['1','2','3','4','5'] ['UInt64','UInt64','UInt64','UInt64','String'] ['Default','Sparse','Default','Sparse','Sparse'] +10000 0 10000 0 [''] +Tuple(Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String)) Default ['1','1.null','2','2.null','3','3.null','4','4.null','5','5.null'] ['Nullable(UInt64)','UInt8','Nullable(UInt64)','UInt8','Nullable(UInt64)','UInt8','Nullable(UInt64)','UInt8','Nullable(String)','UInt8'] ['Default','Default','Default','Default','Default','Default','Default','Default','Default','Default'] +10000 0 10000 0 [''] diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql new file mode 100644 index 00000000000..6e66336dcbc --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_3.sql @@ -0,0 +1,85 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_sparse_mutations_3; + +CREATE TABLE t_sparse_mutations_3 (key UInt8, id UInt64, s String) +ENGINE = MergeTree ORDER BY id PARTITION BY key +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_mutations_3 SELECT 1, number, toString(tuple(1, 0, '1', '0', '')) FROM numbers (10000); + +SELECT type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active +ORDER BY name; + +ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(UInt64, UInt64, String, String, String); + +SELECT + type, + serialization_kind, + subcolumns.names, + subcolumns.types, + subcolumns.serializations +FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active +ORDER BY name; + +SELECT sum(s.1), sum(s.2), groupUniqArray(s.3), groupUniqArray(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3; + +OPTIMIZE TABLE t_sparse_mutations_3 FINAL; + +SELECT + type, + serialization_kind, + subcolumns.names, + subcolumns.types, + subcolumns.serializations +FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active +ORDER BY name; + +SELECT sum(s.1), sum(s.2), groupUniqArray(s.3), groupUniqArray(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3; + +ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(UInt64, UInt64, UInt64, UInt64, String); + +SELECT + type, + serialization_kind, + subcolumns.names, + subcolumns.types, + subcolumns.serializations +FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active +ORDER BY name; + +SELECT sum(s.1), sum(s.2), sum(s.3), sum(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3; + +OPTIMIZE TABLE t_sparse_mutations_3 FINAL; + +SELECT + type, + serialization_kind, + subcolumns.names, + subcolumns.types, + subcolumns.serializations +FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active +ORDER BY name; + +SELECT sum(s.1), sum(s.2), sum(s.3), sum(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3; + +ALTER TABLE t_sparse_mutations_3 MODIFY COLUMN s Tuple(Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(UInt64), Nullable(String)); + +SELECT + type, + serialization_kind, + subcolumns.names, + subcolumns.types, + subcolumns.serializations +FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_3' AND column = 's' AND active +ORDER BY name; + +SELECT sum(s.1), sum(s.2), sum(s.3), sum(s.4), groupUniqArray(s.5) FROM t_sparse_mutations_3; + +DROP TABLE t_sparse_mutations_3; diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference new file mode 100644 index 00000000000..2e24ab44f9a --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.reference @@ -0,0 +1,2 @@ +UInt64 Sparse +String Default diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql new file mode 100644 index 00000000000..039af658489 --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_4.sql @@ -0,0 +1,21 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_sparse_mutations_4; + +CREATE TABLE t_sparse_mutations_4 (k UInt64, v UInt64) +ENGINE = MergeTree ORDER BY k +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_mutations_4 SELECT number, 0 FROM numbers(10000); + +SELECT type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_4' AND column = 'v' AND active +ORDER BY name; + +ALTER TABLE t_sparse_mutations_4 MODIFY COLUMN v String; + +SELECT type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_4' AND column = 'v' AND active +ORDER BY name; + +DROP TABLE t_sparse_mutations_4; diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference new file mode 100644 index 00000000000..698d61cbb24 --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.reference @@ -0,0 +1,2 @@ +Tuple(UInt64, UInt64) Default ['1','2'] ['UInt64','UInt64'] ['Sparse','Sparse'] +Tuple(UInt64, String) Default ['1','2'] ['UInt64','String'] ['Sparse','Default'] diff --git a/tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql new file mode 100644 index 00000000000..79bac836bdc --- /dev/null +++ b/tests/queries/0_stateless/02662_sparse_columns_mutations_5.sql @@ -0,0 +1,21 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_sparse_mutations_5; + +CREATE TABLE t_sparse_mutations_5 (k UInt64, t Tuple(UInt64, UInt64)) +ENGINE = MergeTree ORDER BY k +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_mutations_5 SELECT number, (0, 0) FROM numbers(10000); + +SELECT type, serialization_kind, subcolumns.names, subcolumns.types, subcolumns.serializations FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_5' AND column = 't' AND active +ORDER BY name; + +ALTER TABLE t_sparse_mutations_5 MODIFY COLUMN t Tuple(UInt64, String); + +SELECT type, serialization_kind, subcolumns.names, subcolumns.types, subcolumns.serializations FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_mutations_5' AND column = 't' AND active +ORDER BY name; + +DROP TABLE t_sparse_mutations_5; diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference new file mode 100644 index 00000000000..d083e178586 --- /dev/null +++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.reference @@ -0,0 +1,89 @@ +1 test +3 another +QUERY id: 0 + PROJECTION COLUMNS + a Int32 + b LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3 + COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.02668_logical_optimizer + WHERE + FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3 + CONSTANT id: 8, constant_value: Tuple_(UInt64_1, UInt64_3), constant_value_type: Tuple(UInt8, UInt8) +1 test +QUERY id: 0 + PROJECTION COLUMNS + a Int32 + b LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3 + COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.02668_logical_optimizer + WHERE + FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3 + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + a Int32 + b LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3 + COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.02668_logical_optimizer + WHERE + CONSTANT id: 5, constant_value: UInt64_0, constant_value_type: UInt8 +3 another +QUERY id: 0 + PROJECTION COLUMNS + a Int32 + b LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3 + COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.02668_logical_optimizer + WHERE + FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: a, result_type: Int32, source_id: 3 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 11, function_name: equals, function_type: ordinary, result_type: LowCardinality(UInt8) + ARGUMENTS + LIST id: 12, nodes: 2 + COLUMN id: 13, column_name: b, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 14, constant_value: \'another\', constant_value_type: String +2 test2 +QUERY id: 0 + PROJECTION COLUMNS + a Int32 + b LowCardinality(String) + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: a, result_type: Int32, source_id: 3 + COLUMN id: 4, column_name: b, result_type: LowCardinality(String), source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.02668_logical_optimizer + WHERE + FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: a, result_type: Int32, source_id: 3 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql new file mode 100644 index 00000000000..f20ef412215 --- /dev/null +++ b/tests/queries/0_stateless/02668_logical_optimizer_removing_redundant_checks.sql @@ -0,0 +1,26 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS 02668_logical_optimizer; + +CREATE TABLE 02668_logical_optimizer +(a Int32, b LowCardinality(String)) +ENGINE=Memory; + +INSERT INTO 02668_logical_optimizer VALUES (1, 'test'), (2, 'test2'), (3, 'another'); + +SET optimize_min_equality_disjunction_chain_length = 2; + +SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 3 = a OR 1 = a; +EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 3 = a OR 1 = a; + +SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 1 = a; +EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 1 OR 1 = a; + +SELECT * FROM 02668_logical_optimizer WHERE a = 1 AND 2 = a; +EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 1 AND 2 = a; + +SELECT * FROM 02668_logical_optimizer WHERE 3 = a AND b = 'another' AND a = 3; +EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 3 AND b = 'another' AND a = 3; + +SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a; +EXPLAIN QUERY TREE SELECT * FROM 02668_logical_optimizer WHERE a = 2 AND 2 = a; diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference new file mode 100644 index 00000000000..b7215ac3718 --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime.reference @@ -0,0 +1,243 @@ +-- { echoOn } +-- year +select parseDateTime('2020', '%Y', 'UTC') = toDateTime('2020-01-01', 'UTC'); +1 +-- month +select parseDateTime('02', '%m', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTime('07', '%m', 'UTC') = toDateTime('2000-07-01', 'UTC'); +1 +select parseDateTime('11-', '%m-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +1 +select parseDateTime('00', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02', '%c', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTime('07', '%c', 'UTC') = toDateTime('2000-07-01', 'UTC'); +1 +select parseDateTime('11-', '%c-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +1 +select parseDateTime('00', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +1 +select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +1 +select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME } +set formatdatetime_parsedatetime_m_is_month_name = 1; +select parseDateTime('may', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC'); +1 +select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC'); +1 +select parseDateTime('september', '%M', 'UTC') = toDateTime('2000-09-01', 'UTC'); +1 +select parseDateTime('summer', '%M'); -- { serverError CANNOT_PARSE_DATETIME } +set formatdatetime_parsedatetime_m_is_month_name = 0; +select parseDateTime('08', '%M', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +1 +select parseDateTime('59', '%M', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +1 +select parseDateTime('00/', '%M/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('60', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +set formatdatetime_parsedatetime_m_is_month_name = 1; +-- day of month +select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTime('01', '%d', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTime('/11', '/%d', 'UTC') = toDateTime('2000-01-11', 'UTC'); +1 +select parseDateTime('00', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('32', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('04-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple months of year if supplied +select parseDateTime('01 31 20 02', '%m %d %d %m', 'UTC') = toDateTime('2000-02-20', 'UTC'); +1 +select parseDateTime('02 31 20 04', '%m %d %d %m', 'UTC') = toDateTime('2000-04-20', 'UTC'); +1 +select parseDateTime('02 31 01', '%m %d %m', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTime('2000-02-29', '%Y-%m-%d', 'UTC') = toDateTime('2000-02-29', 'UTC'); +1 +select parseDateTime('2001-02-29', '%Y-%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of year +select parseDateTime('001', '%j', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTime('007', '%j', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTime('/031/', '/%j/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTime('032', '%j', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTime('060', '%j', 'UTC') = toDateTime('2000-02-29', 'UTC'); +1 +select parseDateTime('365', '%j', 'UTC') = toDateTime('2000-12-30', 'UTC'); +1 +select parseDateTime('366', '%j', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTime('1980 001', '%Y %j', 'UTC') = toDateTime('1980-01-01', 'UTC'); +1 +select parseDateTime('1980 007', '%Y %j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +1 +select parseDateTime('1980 /007', '%Y /%j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +1 +select parseDateTime('1980 /031/', '%Y /%j/', 'UTC') = toDateTime('1980-01-31', 'UTC'); +1 +select parseDateTime('1980 032', '%Y %j', 'UTC') = toDateTime('1980-02-01', 'UTC'); +1 +select parseDateTime('1980 060', '%Y %j', 'UTC') = toDateTime('1980-02-29', 'UTC'); +1 +select parseDateTime('1980 366', '%Y %j', 'UTC') = toDateTime('1980-12-31', 'UTC'); +1 +select parseDateTime('1981 366', '%Y %j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('367', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('000', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of years are supplied. +select parseDateTime('2000 366 2001', '%Y %j %Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('2001 366 2000', '%Y %j %Y', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +-- hour of day +select parseDateTime('07', '%H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('23', '%H', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTime('00', '%H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('10', '%H', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('24', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('23', '%k', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTime('00', '%k', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('10', '%k', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('24', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- hour of half day +select parseDateTime('07', '%h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('12', '%h', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01', '%h', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('10', '%h', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('00', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%I', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('12', '%I', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01', '%I', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('10', '%I', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('00', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%l', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('12', '%l', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01', '%l', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('10', '%l', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('00', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- half of day +select parseDateTime('07 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('07 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('07 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('07 am', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('00 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('00 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('00 am', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('00 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +1 +select parseDateTime('01 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('06 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +1 +select parseDateTime('06 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +1 +select parseDateTime('12 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +select parseDateTime('12 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +-- minute +select parseDateTime('08', '%i', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +1 +select parseDateTime('59', '%i', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +1 +select parseDateTime('00/', '%i/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('60', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- second +select parseDateTime('09', '%s', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +1 +select parseDateTime('58', '%s', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +1 +select parseDateTime('00/', '%s/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('60', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- microsecond +select parseDateTime('000000', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('456789', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('42', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError NOT_ENOUGH_SPACE } +select parseDateTime('12ABCD', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- mixed YMD format +select parseDateTime('2021-01-04+23:00:00.654321', '%Y-%m-%d+%H:%i:%s.%f', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC'); +1 +select parseDateTime('2019-07-03 11:04:10.975319', '%Y-%m-%d %H:%i:%s.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +select parseDateTime('10:04:11 03-07-2019.242424', '%s:%i:%H %d-%m-%Y.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +-- *OrZero, *OrNull, str_to_date +select parseDateTimeOrZero('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +select parseDateTimeOrZero('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeOrNull('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +select parseDateTimeOrNull('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; +1 +select str_to_date('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +select sTr_To_DaTe('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; +1 +-- Error handling +select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql new file mode 100644 index 00000000000..3fb4aacedbd --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime.sql @@ -0,0 +1,168 @@ +-- { echoOn } +-- year +select parseDateTime('2020', '%Y', 'UTC') = toDateTime('2020-01-01', 'UTC'); + +-- month +select parseDateTime('02', '%m', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTime('07', '%m', 'UTC') = toDateTime('2000-07-01', 'UTC'); +select parseDateTime('11-', '%m-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +select parseDateTime('00', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02', '%c', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTime('07', '%c', 'UTC') = toDateTime('2000-07-01', 'UTC'); +select parseDateTime('11-', '%c-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +select parseDateTime('00', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME } +set formatdatetime_parsedatetime_m_is_month_name = 1; +select parseDateTime('may', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC'); +select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC'); +select parseDateTime('september', '%M', 'UTC') = toDateTime('2000-09-01', 'UTC'); +select parseDateTime('summer', '%M'); -- { serverError CANNOT_PARSE_DATETIME } +set formatdatetime_parsedatetime_m_is_month_name = 0; +select parseDateTime('08', '%M', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +select parseDateTime('59', '%M', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +select parseDateTime('00/', '%M/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('60', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +set formatdatetime_parsedatetime_m_is_month_name = 1; + +-- day of month +select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTime('01', '%d', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTime('/11', '/%d', 'UTC') = toDateTime('2000-01-11', 'UTC'); +select parseDateTime('00', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('32', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('04-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple months of year if supplied +select parseDateTime('01 31 20 02', '%m %d %d %m', 'UTC') = toDateTime('2000-02-20', 'UTC'); +select parseDateTime('02 31 20 04', '%m %d %d %m', 'UTC') = toDateTime('2000-04-20', 'UTC'); +select parseDateTime('02 31 01', '%m %d %m', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTime('2000-02-29', '%Y-%m-%d', 'UTC') = toDateTime('2000-02-29', 'UTC'); +select parseDateTime('2001-02-29', '%Y-%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of year +select parseDateTime('001', '%j', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTime('007', '%j', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTime('/031/', '/%j/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTime('032', '%j', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTime('060', '%j', 'UTC') = toDateTime('2000-02-29', 'UTC'); +select parseDateTime('365', '%j', 'UTC') = toDateTime('2000-12-30', 'UTC'); +select parseDateTime('366', '%j', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTime('1980 001', '%Y %j', 'UTC') = toDateTime('1980-01-01', 'UTC'); +select parseDateTime('1980 007', '%Y %j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +select parseDateTime('1980 /007', '%Y /%j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +select parseDateTime('1980 /031/', '%Y /%j/', 'UTC') = toDateTime('1980-01-31', 'UTC'); +select parseDateTime('1980 032', '%Y %j', 'UTC') = toDateTime('1980-02-01', 'UTC'); +select parseDateTime('1980 060', '%Y %j', 'UTC') = toDateTime('1980-02-29', 'UTC'); +select parseDateTime('1980 366', '%Y %j', 'UTC') = toDateTime('1980-12-31', 'UTC'); +select parseDateTime('1981 366', '%Y %j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('367', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('000', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of years are supplied. +select parseDateTime('2000 366 2001', '%Y %j %Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('2001 366 2000', '%Y %j %Y', 'UTC') = toDateTime('2000-12-31', 'UTC'); + +-- hour of day +select parseDateTime('07', '%H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('23', '%H', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTime('00', '%H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('10', '%H', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('24', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('23', '%k', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTime('00', '%k', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('10', '%k', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('24', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- hour of half day +select parseDateTime('07', '%h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('12', '%h', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01', '%h', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('10', '%h', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('00', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%I', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('12', '%I', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01', '%I', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('10', '%I', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('00', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%l', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('12', '%l', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01', '%l', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('10', '%l', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('00', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- half of day +select parseDateTime('07 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('07 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('07 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('07 am', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('00 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('00 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('00 am', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('00 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +select parseDateTime('01 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('06 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +select parseDateTime('06 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +select parseDateTime('12 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +select parseDateTime('12 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); + +-- minute +select parseDateTime('08', '%i', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +select parseDateTime('59', '%i', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +select parseDateTime('00/', '%i/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('60', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- second +select parseDateTime('09', '%s', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +select parseDateTime('58', '%s', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +select parseDateTime('00/', '%s/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('60', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- microsecond +select parseDateTime('000000', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('456789', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('42', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError NOT_ENOUGH_SPACE } +select parseDateTime('12ABCD', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- mixed YMD format +select parseDateTime('2021-01-04+23:00:00.654321', '%Y-%m-%d+%H:%i:%s.%f', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC'); +select parseDateTime('2019-07-03 11:04:10.975319', '%Y-%m-%d %H:%i:%s.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select parseDateTime('10:04:11 03-07-2019.242424', '%s:%i:%H %d-%m-%Y.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); + +-- *OrZero, *OrNull, str_to_date +select parseDateTimeOrZero('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select parseDateTimeOrZero('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeOrNull('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select parseDateTimeOrNull('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; +select str_to_date('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select sTr_To_DaTe('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select str_to_date('10:04:11 invalid 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') IS NULL; + +-- Error handling +select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +-- { echoOff } diff --git a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference new file mode 100644 index 00000000000..9fbf105dc41 --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference @@ -0,0 +1,358 @@ +-- { echoOn } +-- empty +select parseDateTimeInJodaSyntax(' ', ' ', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +-- era +select parseDateTimeInJodaSyntax('AD 1999', 'G YYYY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('ad 1999', 'G YYYY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('Ad 1999', 'G YYYY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AD 1999', 'G yyyy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G YYYY yyyy', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G yyyy YYYY', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AD 1999', 'G Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YYY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('BC', 'G'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AB', 'G'); -- { serverError CANNOT_PARSE_DATETIME } +-- year of era +select parseDateTimeInJodaSyntax('2106', 'YYYY', 'UTC') = toDateTime('2106-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1970', 'YYYY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1969', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('+1999', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12', 'YY', 'UTC') = toDateTime('2012-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('69', 'YY', 'UTC') = toDateTime('2069-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('70', 'YY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('99', 'YY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('01', 'YY', 'UTC') = toDateTime('2001-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1', 'YY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('99 98 97', 'YY YY YY', 'UTC') = toDateTime('1997-01-01', 'UTC'); +1 +-- year +select parseDateTimeInJodaSyntax('12', 'yy', 'UTC') = toDateTime('2012-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('69', 'yy', 'UTC') = toDateTime('2069-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('70', 'yy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99 02', 'yy MM', 'UTC') = toDateTime('1999-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10 +10', 'MM yy', 'UTC') = toDateTime('2010-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10+2001', 'MMyyyy', 'UTC') = toDateTime('2001-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+200110', 'yyyyMM', 'UTC') = toDateTime('2001-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1970', 'yyyy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2106', 'yyyy', 'UTC') = toDateTime('2106-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1969', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- week year +select parseDateTimeInJodaSyntax('2106', 'xxxx', 'UTC') = toDateTime('2106-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1971', 'xxxx', 'UTC') = toDateTime('1971-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2025', 'xxxx', 'UTC') = toDateTime('2024-12-30', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12', 'xx', 'UTC') = toDateTime('2012-01-02', 'UTC'); +1 +select parseDateTimeInJodaSyntax('69', 'xx', 'UTC') = toDateTime('2068-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('99', 'xx', 'UTC') = toDateTime('1999-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('01', 'xx', 'UTC') = toDateTime('2001-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+10', 'xx', 'UTC') = toDateTime('2010-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99 01', 'xx ww', 'UTC') = toDateTime('1999-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99 02', 'xx ww', 'UTC') = toDateTime('1999-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10 +10', 'ww xx', 'UTC') = toDateTime('2010-03-08', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2+10', 'wwxx', 'UTC') = toDateTime('2010-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+102', 'xxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+20102', 'xxxxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1970', 'xxxx', 'UTC'); -- { serverError VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE } +select parseDateTimeInJodaSyntax('1969', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- century of era +select parseDateTimeInJodaSyntax('20', 'CC', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('21', 'CC', 'UTC') = toDateTime('2100-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('19', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('22', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- month +select parseDateTimeInJodaSyntax('1', 'M', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax(' 7', ' MM', 'UTC') = toDateTime('2000-07-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11', 'M', 'UTC') = toDateTime('2000-11-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10-', 'M-', 'UTC') = toDateTime('2000-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('-12-', '-M-', 'UTC') = toDateTime('2000-12-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('13', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure MMM and MMMM specifiers consume both short- and long-form month names +select parseDateTimeInJodaSyntax('Aug', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AuG', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('august', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('Aug', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AuG', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('august', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +--- invalid month names +select parseDateTimeInJodaSyntax('Decembr', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decembr', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of month +select parseDateTimeInJodaSyntax('1', 'd', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 ', 'dd ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/11', '/dd', 'UTC') = toDateTime('2000-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/31/', '/d/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('32', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('02-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('04-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of months are supplied. +select parseDateTimeInJodaSyntax('2 31 1', 'M d M', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 31 20 2', 'M d d M', 'UTC') = toDateTime('2000-02-20', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2 31 20 4', 'M d d M', 'UTC') = toDateTime('2000-04-20', 'UTC'); +1 +--- Leap year +select parseDateTimeInJodaSyntax('2020-02-29', 'YYYY-M-d', 'UTC') = toDateTime('2020-02-29', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2001-02-29', 'YYYY-M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of year +select parseDateTimeInJodaSyntax('1', 'D', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 ', 'DD ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/11', '/DD', 'UTC') = toDateTime('2000-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/31/', '/DDD/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('32', 'D', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('60', 'D', 'UTC') = toDateTime('2000-02-29', 'UTC'); +1 +select parseDateTimeInJodaSyntax('365', 'D', 'UTC') = toDateTime('2000-12-30', 'UTC'); +1 +select parseDateTimeInJodaSyntax('366', 'D', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 1', 'yyyy D', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 7 ', 'yyyy DD ', 'UTC') = toDateTime('1999-01-07', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 /11', 'yyyy /DD', 'UTC') = toDateTime('1999-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 /31/', 'yyyy /DD/', 'UTC') = toDateTime('1999-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 32', 'yyyy D', 'UTC') = toDateTime('1999-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 60', 'yyyy D', 'UTC') = toDateTime('1999-03-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 365', 'yyyy D', 'UTC') = toDateTime('1999-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 366', 'yyyy D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure all days of year are checked against final selected year +select parseDateTimeInJodaSyntax('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2000 366 2001', 'yyyy D yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('367', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- hour of day +select parseDateTimeInJodaSyntax('7', 'H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('23', 'HH', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'HHH', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'HHHHHHHH', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +--- invalid hour od day +select parseDateTimeInJodaSyntax('24', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- clock hour of day +select parseDateTimeInJodaSyntax('7', 'k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24', 'kk', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1', 'kkk', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'kkkkkkkk', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +-- invalid clock hour of day +select parseDateTimeInJodaSyntax('25', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- hour of half day +select parseDateTimeInJodaSyntax('7', 'K', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11', 'KK', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'KKK', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'KKKKKKKK', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +-- invalid hour of half day +select parseDateTimeInJodaSyntax('12', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- clock hour of half day +select parseDateTimeInJodaSyntax('7', 'h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12', 'hh', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1', 'hhh', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'hhhhhhhh', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +-- invalid clock hour of half day +select parseDateTimeInJodaSyntax('13', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- half of day +--- Half of day has no effect if hour or clockhour of day is provided hour of day tests +select parseDateTimeInJodaSyntax('7 PM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 AM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 pm', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 am', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 PM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 AM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 pm', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 am', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 PM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 AM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 pm', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 am', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 PM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 AM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 pm', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 am', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +-- Half of day has effect if hour or clockhour of halfday is provided +select parseDateTimeInJodaSyntax('0 PM', 'K a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 AM', 'K a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 PM', 'K a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 AM', 'K a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11 PM', 'K a', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11 AM', 'K a', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 PM', 'h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 AM', 'h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 PM', 'h a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 AM', 'h a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12 PM', 'h a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +-- time gives precendent to most recent time specifier +select parseDateTimeInJodaSyntax('0 1 AM', 'H h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12 1 PM', 'H h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 AM 0', 'h a H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 AM 12', 'h a H', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +-- minute +select parseDateTimeInJodaSyntax('8', 'm', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('59', 'mm', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0/', 'mmm/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('60', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- second +select parseDateTimeInJodaSyntax('9', 's', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +1 +select parseDateTimeInJodaSyntax('58', 'ss', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0/', 's/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('60', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- integer overflow in AST Fuzzer +select parseDateTimeInJodaSyntax('19191919191919191919191919191919', 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- *OrZero, *OrNull +select parseDateTimeInJodaSyntaxOrZero('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntaxOrZero('2001 invalid 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntaxOrNull('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntaxOrNull('2001 invalid 366 2000', 'yyyy D yyyy', 'UTC') IS NULL; +1 +-- Error handling +select parseDateTimeInJodaSyntax('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql new file mode 100644 index 00000000000..f5810d3d4c3 --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql @@ -0,0 +1,244 @@ +-- { echoOn } +-- empty +select parseDateTimeInJodaSyntax(' ', ' ', 'UTC') = toDateTime('1970-01-01', 'UTC'); + +-- era +select parseDateTimeInJodaSyntax('AD 1999', 'G YYYY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('ad 1999', 'G YYYY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('Ad 1999', 'G YYYY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('AD 1999', 'G yyyy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G YYYY yyyy', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G yyyy YYYY', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('AD 1999', 'G Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YYY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('BC', 'G'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AB', 'G'); -- { serverError CANNOT_PARSE_DATETIME } + +-- year of era +select parseDateTimeInJodaSyntax('2106', 'YYYY', 'UTC') = toDateTime('2106-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1970', 'YYYY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1969', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('+1999', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +select parseDateTimeInJodaSyntax('12', 'YY', 'UTC') = toDateTime('2012-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('69', 'YY', 'UTC') = toDateTime('2069-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('70', 'YY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('99', 'YY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('01', 'YY', 'UTC') = toDateTime('2001-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1', 'YY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +select parseDateTimeInJodaSyntax('99 98 97', 'YY YY YY', 'UTC') = toDateTime('1997-01-01', 'UTC'); + +-- year +select parseDateTimeInJodaSyntax('12', 'yy', 'UTC') = toDateTime('2012-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('69', 'yy', 'UTC') = toDateTime('2069-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('70', 'yy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('+99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('+99 02', 'yy MM', 'UTC') = toDateTime('1999-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('10 +10', 'MM yy', 'UTC') = toDateTime('2010-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('10+2001', 'MMyyyy', 'UTC') = toDateTime('2001-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('+200110', 'yyyyMM', 'UTC') = toDateTime('2001-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('1970', 'yyyy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('2106', 'yyyy', 'UTC') = toDateTime('2106-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1969', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- week year +select parseDateTimeInJodaSyntax('2106', 'xxxx', 'UTC') = toDateTime('2106-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('1971', 'xxxx', 'UTC') = toDateTime('1971-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('2025', 'xxxx', 'UTC') = toDateTime('2024-12-30', 'UTC'); +select parseDateTimeInJodaSyntax('12', 'xx', 'UTC') = toDateTime('2012-01-02', 'UTC'); +select parseDateTimeInJodaSyntax('69', 'xx', 'UTC') = toDateTime('2068-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('99', 'xx', 'UTC') = toDateTime('1999-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('01', 'xx', 'UTC') = toDateTime('2001-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('+10', 'xx', 'UTC') = toDateTime('2010-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('+99 01', 'xx ww', 'UTC') = toDateTime('1999-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('+99 02', 'xx ww', 'UTC') = toDateTime('1999-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('10 +10', 'ww xx', 'UTC') = toDateTime('2010-03-08', 'UTC'); +select parseDateTimeInJodaSyntax('2+10', 'wwxx', 'UTC') = toDateTime('2010-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('+102', 'xxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('+20102', 'xxxxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('1970', 'xxxx', 'UTC'); -- { serverError VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE } +select parseDateTimeInJodaSyntax('1969', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- century of era +select parseDateTimeInJodaSyntax('20', 'CC', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('21', 'CC', 'UTC') = toDateTime('2100-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('19', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('22', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- month +select parseDateTimeInJodaSyntax('1', 'M', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax(' 7', ' MM', 'UTC') = toDateTime('2000-07-01', 'UTC'); +select parseDateTimeInJodaSyntax('11', 'M', 'UTC') = toDateTime('2000-11-01', 'UTC'); +select parseDateTimeInJodaSyntax('10-', 'M-', 'UTC') = toDateTime('2000-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('-12-', '-M-', 'UTC') = toDateTime('2000-12-01', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('13', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure MMM and MMMM specifiers consume both short- and long-form month names +select parseDateTimeInJodaSyntax('Aug', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('AuG', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('august', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('Aug', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('AuG', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('august', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +--- invalid month names +select parseDateTimeInJodaSyntax('Decembr', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decembr', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of month +select parseDateTimeInJodaSyntax('1', 'd', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('7 ', 'dd ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTimeInJodaSyntax('/11', '/dd', 'UTC') = toDateTime('2000-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('/31/', '/d/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('32', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('02-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('04-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of months are supplied. +select parseDateTimeInJodaSyntax('2 31 1', 'M d M', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('1 31 20 2', 'M d d M', 'UTC') = toDateTime('2000-02-20', 'UTC'); +select parseDateTimeInJodaSyntax('2 31 20 4', 'M d d M', 'UTC') = toDateTime('2000-04-20', 'UTC'); +--- Leap year +select parseDateTimeInJodaSyntax('2020-02-29', 'YYYY-M-d', 'UTC') = toDateTime('2020-02-29', 'UTC'); +select parseDateTimeInJodaSyntax('2001-02-29', 'YYYY-M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of year +select parseDateTimeInJodaSyntax('1', 'D', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('7 ', 'DD ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTimeInJodaSyntax('/11', '/DD', 'UTC') = toDateTime('2000-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('/31/', '/DDD/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('32', 'D', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('60', 'D', 'UTC') = toDateTime('2000-02-29', 'UTC'); +select parseDateTimeInJodaSyntax('365', 'D', 'UTC') = toDateTime('2000-12-30', 'UTC'); +select parseDateTimeInJodaSyntax('366', 'D', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('1999 1', 'yyyy D', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1999 7 ', 'yyyy DD ', 'UTC') = toDateTime('1999-01-07', 'UTC'); +select parseDateTimeInJodaSyntax('1999 /11', 'yyyy /DD', 'UTC') = toDateTime('1999-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('1999 /31/', 'yyyy /DD/', 'UTC') = toDateTime('1999-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('1999 32', 'yyyy D', 'UTC') = toDateTime('1999-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('1999 60', 'yyyy D', 'UTC') = toDateTime('1999-03-01', 'UTC'); +select parseDateTimeInJodaSyntax('1999 365', 'yyyy D', 'UTC') = toDateTime('1999-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('1999 366', 'yyyy D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure all days of year are checked against final selected year +select parseDateTimeInJodaSyntax('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('2000 366 2001', 'yyyy D yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('367', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- hour of day +select parseDateTimeInJodaSyntax('7', 'H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('23', 'HH', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'HHH', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'HHHHHHHH', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +--- invalid hour od day +select parseDateTimeInJodaSyntax('24', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- clock hour of day +select parseDateTimeInJodaSyntax('7', 'k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24', 'kk', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1', 'kkk', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'kkkkkkkk', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +-- invalid clock hour of day +select parseDateTimeInJodaSyntax('25', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- hour of half day +select parseDateTimeInJodaSyntax('7', 'K', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('11', 'KK', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'KKK', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'KKKKKKKK', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +-- invalid hour of half day +select parseDateTimeInJodaSyntax('12', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- clock hour of half day +select parseDateTimeInJodaSyntax('7', 'h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12', 'hh', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1', 'hhh', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'hhhhhhhh', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +-- invalid clock hour of half day +select parseDateTimeInJodaSyntax('13', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- half of day +--- Half of day has no effect if hour or clockhour of day is provided hour of day tests +select parseDateTimeInJodaSyntax('7 PM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 AM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 pm', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 am', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 PM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 AM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 pm', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 am', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 PM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 AM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 pm', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 am', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 PM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 AM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 pm', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 am', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +-- Half of day has effect if hour or clockhour of halfday is provided +select parseDateTimeInJodaSyntax('0 PM', 'K a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 AM', 'K a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 PM', 'K a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 AM', 'K a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('11 PM', 'K a', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('11 AM', 'K a', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 PM', 'h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 AM', 'h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 PM', 'h a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 AM', 'h a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12 PM', 'h a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +-- time gives precendent to most recent time specifier +select parseDateTimeInJodaSyntax('0 1 AM', 'H h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12 1 PM', 'H h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 AM 0', 'h a H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 AM 12', 'h a H', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); + +-- minute +select parseDateTimeInJodaSyntax('8', 'm', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +select parseDateTimeInJodaSyntax('59', 'mm', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +select parseDateTimeInJodaSyntax('0/', 'mmm/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('60', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- second +select parseDateTimeInJodaSyntax('9', 's', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +select parseDateTimeInJodaSyntax('58', 'ss', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +select parseDateTimeInJodaSyntax('0/', 's/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('60', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- integer overflow in AST Fuzzer +select parseDateTimeInJodaSyntax('19191919191919191919191919191919', 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- *OrZero, *OrNull +select parseDateTimeInJodaSyntaxOrZero('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTimeInJodaSyntaxOrZero('2001 invalid 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntaxOrNull('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTimeInJodaSyntaxOrNull('2001 invalid 366 2000', 'yyyy D yyyy', 'UTC') IS NULL; + +-- Error handling +select parseDateTimeInJodaSyntax('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- { echoOff } diff --git a/tests/queries/0_stateless/02668_ulid_decoding.reference b/tests/queries/0_stateless/02668_ulid_decoding.reference new file mode 100644 index 00000000000..b48580d60bb --- /dev/null +++ b/tests/queries/0_stateless/02668_ulid_decoding.reference @@ -0,0 +1,3 @@ +1 +2023-03-27 19:16:44.000 +2023-03-27 19:16:44.000 diff --git a/tests/queries/0_stateless/02668_ulid_decoding.sql b/tests/queries/0_stateless/02668_ulid_decoding.sql new file mode 100644 index 00000000000..df94025b7b5 --- /dev/null +++ b/tests/queries/0_stateless/02668_ulid_decoding.sql @@ -0,0 +1,10 @@ +-- Tags: no-fasttest + +SELECT dateDiff('minute', ULIDStringToDateTime(generateULID()), now()) = 0; +SELECT toTimezone(ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E'), 'America/Costa_Rica'); +SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Rica'); +SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9', 'America/Costa_Rica'); -- { serverError ILLEGAL_COLUMN } +SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E', 'America/Costa_Ric'); -- { serverError POCO_EXCEPTION } +SELECT ULIDStringToDateTime('01GWJWKW30MFPQJRYEAF4XFZ9E0'); -- { serverError ILLEGAL_COLUMN } +SELECT ULIDStringToDateTime(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ULIDStringToDateTime(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02669_alter_modify_to_nullable.reference b/tests/queries/0_stateless/02669_alter_modify_to_nullable.reference new file mode 100644 index 00000000000..aff80e1d699 --- /dev/null +++ b/tests/queries/0_stateless/02669_alter_modify_to_nullable.reference @@ -0,0 +1,8 @@ +1_1_1_0 String Default +2_2_2_0 String Sparse +20000 10435 ['','bar','foo'] +1_1_1_0_3 String Default +2_2_2_0_3 Nullable(String) Default +20000 10435 ['','bar','foo'] +1_1_1_0_3 0 +2_2_2_0_3 10000 diff --git a/tests/queries/0_stateless/02669_alter_modify_to_nullable.sql b/tests/queries/0_stateless/02669_alter_modify_to_nullable.sql new file mode 100644 index 00000000000..862280fd7cd --- /dev/null +++ b/tests/queries/0_stateless/02669_alter_modify_to_nullable.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS t_modify_to_nullable; + +CREATE TABLE t_modify_to_nullable (key UInt64, id UInt64, s String) +ENGINE = MergeTree ORDER BY id PARTITION BY key +SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_modify_to_nullable SELECT 1, number, 'foo' FROM numbers(10000); +INSERT INTO t_modify_to_nullable SELECT 2, number, if (number % 23 = 0, 'bar', '') FROM numbers(10000); + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_modify_to_nullable' AND column = 's' AND active +ORDER BY name; + +SELECT count(s), countIf(s != ''), arraySort(groupUniqArray(s)) FROM t_modify_to_nullable; + +SET mutations_sync = 2; +ALTER TABLE t_modify_to_nullable MODIFY COLUMN s Nullable(String); + +SELECT name, type, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_modify_to_nullable' AND column = 's' AND active +ORDER BY name; + +SELECT count(s), countIf(s != ''), arraySort(groupUniqArray(s)) FROM t_modify_to_nullable; + +SYSTEM FLUSH LOGS; + +SELECT part_name, read_rows FROM system.part_log +WHERE database = currentDatabase() AND table = 't_modify_to_nullable' AND event_type = 'MutatePart' +ORDER BY part_name; + +DROP TABLE t_modify_to_nullable; diff --git a/tests/queries/0_stateless/00979_live_view_watch_live_with_subquery.reference b/tests/queries/0_stateless/02670_constant_skip_index.reference similarity index 100% rename from tests/queries/0_stateless/00979_live_view_watch_live_with_subquery.reference rename to tests/queries/0_stateless/02670_constant_skip_index.reference diff --git a/tests/queries/0_stateless/02670_constant_skip_index.sql b/tests/queries/0_stateless/02670_constant_skip_index.sql new file mode 100644 index 00000000000..97dd2ab33c9 --- /dev/null +++ b/tests/queries/0_stateless/02670_constant_skip_index.sql @@ -0,0 +1,25 @@ + +DROP TABLE IF EXISTS t_constant_index; + +CREATE TABLE t_constant_index +( + id UInt64, + INDEX t_constant_index 'foo' TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree +ORDER BY id; -- { serverError INCORRECT_QUERY } + +CREATE TABLE t_constant_index +( + id UInt64, + INDEX t_constant_index id + rand() TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree +ORDER BY id; -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t_constant_index +( + id UInt64, + INDEX t_constant_index id * 2 TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree +ORDER BY id; + +DROP TABLE t_constant_index; diff --git a/tests/queries/0_stateless/02674_and_consistency.reference b/tests/queries/0_stateless/02674_and_consistency.reference new file mode 100644 index 00000000000..e74ab1928c5 --- /dev/null +++ b/tests/queries/0_stateless/02674_and_consistency.reference @@ -0,0 +1,4 @@ +10 +#45218 +10 += diff --git a/tests/queries/0_stateless/02674_and_consistency.sql b/tests/queries/0_stateless/02674_and_consistency.sql new file mode 100644 index 00000000000..5988832ba68 --- /dev/null +++ b/tests/queries/0_stateless/02674_and_consistency.sql @@ -0,0 +1,21 @@ +SELECT SUM(number) +FROM +( + SELECT 10 AS number +) +GROUP BY number +HAVING 1 AND sin(SUMOrNull(number)) +SETTINGS enable_optimize_predicate_expression = 0; + +select '#45218'; + +SELECT SUM(number) +FROM +( + SELECT 10 AS number +) +GROUP BY cos(min2(number, number) % number) - number +HAVING ((-sign(-233841197)) IS NOT NULL) AND sin(lcm(SUM(number), SUM(number)) >= ('372497213' IS NOT NULL)) +SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0; + +select '='; diff --git a/tests/queries/0_stateless/02674_date_int_string_json_inference.reference b/tests/queries/0_stateless/02674_date_int_string_json_inference.reference new file mode 100644 index 00000000000..2e89d6a15a4 --- /dev/null +++ b/tests/queries/0_stateless/02674_date_int_string_json_inference.reference @@ -0,0 +1 @@ +x Nullable(String) diff --git a/tests/queries/0_stateless/02674_date_int_string_json_inference.sql b/tests/queries/0_stateless/02674_date_int_string_json_inference.sql new file mode 100644 index 00000000000..21abf763cbf --- /dev/null +++ b/tests/queries/0_stateless/02674_date_int_string_json_inference.sql @@ -0,0 +1,2 @@ +desc format(JSONEachRow, '{"x" : "2020-01-01"}, {"x" : "1000"}') + diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.reference b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference new file mode 100644 index 00000000000..05feadb58a0 --- /dev/null +++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.reference @@ -0,0 +1,47 @@ +-- { echoOn } +set allow_experimental_analyzer=1; +set optimize_trivial_count_query=1; +create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple(); +select count() from m3; +0 +insert into m3 values (0,0); +insert into m3 values (-1,1); +select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +ReadFromPreparedSource (Optimized trivial count) +select count() from m3; +2 +select count(*) from m3; +2 +select count(a) from m3; +2 +select count(b) from m3; +2 +select count() + 1 from m3; +3 +drop table m3; +-- checking queries with FINAL +create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b); +SYSTEM STOP MERGES replacing_m3; +select count() from replacing_m3; +0 +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (-1,1); +insert into replacing_m3 values (-2,2); +select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +ReadFromPreparedSource (Optimized trivial count) +select count() from replacing_m3; +4 +select count(*) from replacing_m3; +4 +select count(a) from replacing_m3; +4 +select count(b) from replacing_m3; +4 +select count() from replacing_m3 FINAL; +3 +select count(a) from replacing_m3 FINAL; +3 +select count(b) from replacing_m3 FINAL; +3 +drop table replacing_m3; diff --git a/tests/queries/0_stateless/02674_trivial_count_analyzer.sql b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql new file mode 100644 index 00000000000..988d1b9ba92 --- /dev/null +++ b/tests/queries/0_stateless/02674_trivial_count_analyzer.sql @@ -0,0 +1,45 @@ +drop table if exists m3; +drop table if exists replacing_m3; + +-- { echoOn } +set allow_experimental_analyzer=1; +set optimize_trivial_count_query=1; + +create table m3(a Int64, b UInt64) Engine=MergeTree order by tuple(); + +select count() from m3; + +insert into m3 values (0,0); +insert into m3 values (-1,1); + +select trimBoth(explain) from (explain select count() from m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +select count() from m3; +select count(*) from m3; +select count(a) from m3; +select count(b) from m3; +select count() + 1 from m3; + +drop table m3; + +-- checking queries with FINAL +create table replacing_m3(a Int64, b UInt64) Engine=ReplacingMergeTree() order by (a, b); +SYSTEM STOP MERGES replacing_m3; + +select count() from replacing_m3; + +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (0,0); +insert into replacing_m3 values (-1,1); +insert into replacing_m3 values (-2,2); + +select trimBoth(explain) from (explain select count() from replacing_m3) where explain like '%ReadFromPreparedSource (Optimized trivial count)%'; +select count() from replacing_m3; +select count(*) from replacing_m3; +select count(a) from replacing_m3; +select count(b) from replacing_m3; + +select count() from replacing_m3 FINAL; +select count(a) from replacing_m3 FINAL; +select count(b) from replacing_m3 FINAL; + +drop table replacing_m3; diff --git a/tests/queries/0_stateless/02675_grant_query_formatting.reference b/tests/queries/0_stateless/02675_grant_query_formatting.reference new file mode 100644 index 00000000000..8cbf028e742 --- /dev/null +++ b/tests/queries/0_stateless/02675_grant_query_formatting.reference @@ -0,0 +1 @@ +GRANT SELECT ON *.* TO A WITH REPLACE OPTION diff --git a/tests/queries/0_stateless/02675_grant_query_formatting.sh b/tests/queries/0_stateless/02675_grant_query_formatting.sh new file mode 100755 index 00000000000..76c953913a9 --- /dev/null +++ b/tests/queries/0_stateless/02675_grant_query_formatting.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT --oneline" + +echo "grant select on *.* to A with replace option" | $format diff --git a/tests/queries/0_stateless/01920_async_drain_connections.reference b/tests/queries/0_stateless/02675_is_ipv6_function_fix.reference similarity index 50% rename from tests/queries/0_stateless/01920_async_drain_connections.reference rename to tests/queries/0_stateless/02675_is_ipv6_function_fix.reference index aa47d0d46d4..573541ac970 100644 --- a/tests/queries/0_stateless/01920_async_drain_connections.reference +++ b/tests/queries/0_stateless/02675_is_ipv6_function_fix.reference @@ -1,2 +1 @@ 0 -0 diff --git a/tests/queries/0_stateless/02675_is_ipv6_function_fix.sql b/tests/queries/0_stateless/02675_is_ipv6_function_fix.sql new file mode 100644 index 00000000000..c28b4a5dc2d --- /dev/null +++ b/tests/queries/0_stateless/02675_is_ipv6_function_fix.sql @@ -0,0 +1 @@ +SELECT isIPv6String('1234::1234:'); \ No newline at end of file diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference new file mode 100644 index 00000000000..ecdb62c5cb5 --- /dev/null +++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference @@ -0,0 +1,33 @@ +Expression ((Project names + (Projection + ))) +Header: t1.id UInt64 + t1.value String + t2.value String +Actions: INPUT : 0 -> t1.id_0 UInt64 : 0 + INPUT : 1 -> t1.value_1 String : 1 + INPUT : 2 -> t2.value_2 String : 2 + ALIAS t1.id_0 :: 0 -> t1.id UInt64 : 3 + ALIAS t1.value_1 :: 1 -> t1.value String : 0 + ALIAS t2.value_2 :: 2 -> t2.value String : 1 +Positions: 3 0 1 + FilledJoin (Filled JOIN) + Header: t1.id_0 UInt64 + t1.value_1 String + t2.value_2 String + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: t1.id_0 UInt64 + t1.value_1 String + Filter column: equals(t1.id_0, 0_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 2 + ALIAS id :: 0 -> t1.id_0 UInt64 : 3 + ALIAS value :: 1 -> t1.value_1 String : 0 + FUNCTION equals(t1.id_0 : 3, 0_UInt8 :: 2) -> equals(t1.id_0, 0_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +0 Value JoinValue diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql new file mode 100644 index 00000000000..930127497ae --- /dev/null +++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.sql @@ -0,0 +1,28 @@ +SET allow_experimental_analyzer = 1; +SET single_join_prefer_left_table = 0; +SET optimize_move_to_prewhere = 0; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, 'Value'); + +DROP TABLE IF EXISTS test_table_join; +CREATE TABLE test_table_join +( + id UInt64, + value String +) ENGINE = Join(All, inner, id); + +INSERT INTO test_table_join VALUES (0, 'JoinValue'); + +EXPLAIN header = 1, actions = 1 SELECT t1.id, t1.value, t2.value FROM test_table AS t1 INNER JOIN test_table_join AS t2 ON t1.id = t2.id WHERE t1.id = 0; + +SELECT t1.id, t1.value, t2.value FROM test_table AS t1 INNER JOIN test_table_join AS t2 ON t1.id = t2.id WHERE t1.id = 0; + +DROP TABLE test_table_join; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference new file mode 100644 index 00000000000..3f34d5e2c79 --- /dev/null +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference @@ -0,0 +1,23 @@ +INSERT TO S3 + [ 0 ] S3CompleteMultipartUpload: 1 + [ 0 ] S3CreateMultipartUpload: 1 + [ 0 ] S3HeadObject: 2 + [ 0 ] S3ReadRequestsCount: 2 + [ 0 ] S3UploadPart: 1 + [ 0 ] S3WriteRequestsCount: 3 +CHECK WITH query_log +QueryFinish S3CreateMultipartUpload 1 S3UploadPart 1 S3CompleteMultipartUpload 1 S3PutObject 0 +CREATE +INSERT + [ 0 ] FileOpen: 8 +READ +INSERT and READ INSERT + [ 0 ] FileOpen: 8 + [ 0 ] FileOpen: 8 +DROP +CHECK with query_log +QueryFinish INSERT INTO times SELECT now() + INTERVAL 1 day SETTINGS optimize_on_insert = 0; FileOpen 8 +QueryFinish SELECT \'1\', min(t) FROM times; FileOpen 0 +QueryFinish INSERT INTO times SELECT now() + INTERVAL 2 day SETTINGS optimize_on_insert = 0; FileOpen 8 +QueryFinish SELECT \'2\', min(t) FROM times; FileOpen 0 +QueryFinish INSERT INTO times SELECT now() + INTERVAL 3 day SETTINGS optimize_on_insert = 0; FileOpen 8 diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh new file mode 100755 index 00000000000..adc9525ef81 --- /dev/null +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: needs s3 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "INSERT TO S3" +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " +INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1; +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | sort + +echo "CHECK WITH query_log" +$CLICKHOUSE_CLIENT -nq " +SYSTEM FLUSH LOGS; +SELECT type, + 'S3CreateMultipartUpload', ProfileEvents['S3CreateMultipartUpload'], + 'S3UploadPart', ProfileEvents['S3UploadPart'], + 'S3CompleteMultipartUpload', ProfileEvents['S3CompleteMultipartUpload'], + 'S3PutObject', ProfileEvents['S3PutObject'] +FROM system.query_log +WHERE query LIKE '%profile_events.csv%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC; +" + +echo "CREATE" +$CLICKHOUSE_CLIENT -nq " +DROP TABLE IF EXISTS times; +CREATE TABLE times (t DateTime) ENGINE MergeTree ORDER BY t + SETTINGS + storage_policy='default', + min_rows_for_wide_part = 1000000, + min_bytes_for_wide_part = 1000000, + ratio_of_defaults_for_sparse_serialization=1.0; +" + +echo "INSERT" +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " +INSERT INTO times SELECT now() + INTERVAL 1 day SETTINGS optimize_on_insert = 0; +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ ' + +echo "READ" +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " +SELECT '1', min(t) FROM times; +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ ' + +echo "INSERT and READ INSERT" +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " +INSERT INTO times SELECT now() + INTERVAL 2 day SETTINGS optimize_on_insert = 0; +SELECT '2', min(t) FROM times; +INSERT INTO times SELECT now() + INTERVAL 3 day SETTINGS optimize_on_insert = 0; +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ ' + +echo "DROP" +$CLICKHOUSE_CLIENT -nq " +DROP TABLE times; +" + +echo "CHECK with query_log" +$CLICKHOUSE_CLIENT -nq " +SYSTEM FLUSH LOGS; +SELECT type, + query, + 'FileOpen', ProfileEvents['FileOpen'] +FROM system.query_log +WHERE current_database = currentDatabase() +AND ( query LIKE '%SELECT % FROM times%' OR query LIKE '%INSERT INTO times%' ) +AND type = 'QueryFinish' +ORDER BY query_start_time_microseconds ASC, query DESC; +" diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference b/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference new file mode 100644 index 00000000000..56fa4a9ebea --- /dev/null +++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.reference @@ -0,0 +1,6 @@ +arr Default +v Sparse +arr Default +arr Default +v Sparse +0 [] diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql new file mode 100644 index 00000000000..781030ef7b4 --- /dev/null +++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS t_sparse_columns_clear; + +CREATE TABLE t_sparse_columns_clear (arr Array(UInt64), v UInt64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS + ratio_of_defaults_for_sparse_serialization = 0.9, + min_bytes_for_wide_part=0; + +INSERT INTO t_sparse_columns_clear SELECT [number], 0 FROM numbers(1000); + +SELECT column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active +ORDER BY column; + +SET mutations_sync = 2; +SET alter_sync = 2; + +ALTER TABLE t_sparse_columns_clear CLEAR COLUMN v; + +SELECT column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active +ORDER BY column; + +OPTIMIZE TABLE t_sparse_columns_clear FINAL; + +SELECT column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_columns_clear' AND active +ORDER BY column; + +DROP TABLE t_sparse_columns_clear SYNC; + +SYSTEM FLUSH LOGS; + +SELECT count(), groupArray(message) FROM system.text_log WHERE logger_name LIKE '%' || currentDatabase() || '.t_sparse_columns_clear' || '%' AND level = 'Error'; diff --git a/tests/queries/0_stateless/02676_analyzer_limit_offset.reference b/tests/queries/0_stateless/02676_analyzer_limit_offset.reference new file mode 100644 index 00000000000..96483268d43 --- /dev/null +++ b/tests/queries/0_stateless/02676_analyzer_limit_offset.reference @@ -0,0 +1,63 @@ +0 +1 +2 +3 +4 +15 +15 +16 +16 +17 +30 +30 +31 +31 +32 +102 +103 +104 +105 +105 +106 +107 +108 +109 +105 +106 +107 +108 +109 +60 +60 +61 +61 +62 +62 +63 +63 +64 +64 +60 +35 +35 +36 +36 +37 +37 +38 +38 +39 +39 +105 +106 +107 +108 +109 +12 +13 +13 +14 +14 +15 +15 +16 diff --git a/tests/queries/0_stateless/02676_analyzer_limit_offset.sql b/tests/queries/0_stateless/02676_analyzer_limit_offset.sql new file mode 100644 index 00000000000..39c6b85f088 --- /dev/null +++ b/tests/queries/0_stateless/02676_analyzer_limit_offset.sql @@ -0,0 +1,34 @@ +set allow_experimental_analyzer=1; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (i UInt64) Engine = MergeTree() order by i; +INSERT INTO test SELECT number FROM numbers(100); +INSERT INTO test SELECT number FROM numbers(10,100); +OPTIMIZE TABLE test FINAL; + +-- Only set limit +SET limit = 5; +SELECT * FROM test; -- 5 rows +SELECT * FROM test OFFSET 20; -- 5 rows +SELECT * FROM (SELECT i FROM test LIMIT 10 OFFSET 50) TMP; -- 5 rows +SELECT * FROM test LIMIT 4 OFFSET 192; -- 4 rows +SELECT * FROM test LIMIT 10 OFFSET 195; -- 5 rows + +-- Only set offset +SET limit = 0; +SET offset = 195; +SELECT * FROM test; -- 5 rows +SELECT * FROM test OFFSET 20; -- no result +SELECT * FROM test LIMIT 100; -- no result +SET offset = 10; +SELECT * FROM test LIMIT 20 OFFSET 100; -- 10 rows +SELECT * FROM test LIMIT 11 OFFSET 100; -- 1 rows + +-- offset and limit together +SET limit = 10; +SELECT * FROM test LIMIT 50 OFFSET 50; -- 10 rows +SELECT * FROM test LIMIT 50 OFFSET 190; -- 0 rows +SELECT * FROM test LIMIT 50 OFFSET 185; -- 5 rows +SELECT * FROM test LIMIT 18 OFFSET 5; -- 8 rows + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference new file mode 100644 index 00000000000..016202cfb66 --- /dev/null +++ b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.reference @@ -0,0 +1 @@ +MergeTreeInOrder diff --git a/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql new file mode 100644 index 00000000000..f00c1322e1d --- /dev/null +++ b/tests/queries/0_stateless/02676_distinct_reading_in_order_analyzer.sql @@ -0,0 +1,8 @@ +drop table if exists t; + +set allow_experimental_analyzer=1; + +create table t (a UInt64, b UInt64) engine=MergeTree() order by (a); +insert into t select number % 2, number from numbers(10); + +select splitByChar(' ', trimBoth(explain))[1] from (explain pipeline select distinct a from t) where explain like '%MergeTreeInOrder%'; diff --git a/tests/queries/0_stateless/02676_kafka_murmur_hash.reference b/tests/queries/0_stateless/02676_kafka_murmur_hash.reference new file mode 100644 index 00000000000..43f8440a5fc --- /dev/null +++ b/tests/queries/0_stateless/02676_kafka_murmur_hash.reference @@ -0,0 +1,5 @@ +1173551340 +1357151166 +1161502112 +661178819 +2088585677 diff --git a/tests/queries/0_stateless/02676_kafka_murmur_hash.sql b/tests/queries/0_stateless/02676_kafka_murmur_hash.sql new file mode 100644 index 00000000000..d2847b757e2 --- /dev/null +++ b/tests/queries/0_stateless/02676_kafka_murmur_hash.sql @@ -0,0 +1,8 @@ +-- Test are taken from: https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java#L93 +-- and the reference is generated with: https://pastila.nl/?06465d36/87f8ab2c9f6501c54f1c0879a13c8626 + +SELECT kafkaMurmurHash('21'); +SELECT kafkaMurmurHash('foobar'); +SELECT kafkaMurmurHash('a-little-bit-long-string'); +SELECT kafkaMurmurHash('a-little-bit-longer-string'); +SELECT kafkaMurmurHash('lkjh234lh9fiuh90y23oiuhsafujhadof229phr9h19h89h8'); diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference new file mode 100644 index 00000000000..0f3c482f188 --- /dev/null +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference @@ -0,0 +1,7 @@ +Without merge +3 +With merge replicated any part range +1 +With merge replicated partition only +1 +1 diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh new file mode 100755 index 00000000000..2202a349c56 --- /dev/null +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Wait for number of parts in table $1 to become $2. +# Print the changed value. If no changes for $3 seconds, prints initial value. +wait_for_number_of_parts() { + for _ in `seq $3` + do + sleep 1 + res=`$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='$1' AND active"` + if [ "$res" -eq "$2" ] + then + echo "$res" + return + fi + done + echo "$res" +} + +$CLICKHOUSE_CLIENT -nmq " +DROP TABLE IF EXISTS test_without_merge; +DROP TABLE IF EXISTS test_replicated; + +SELECT 'Without merge'; + +CREATE TABLE test_without_merge (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676_without_merge', 'node') ORDER BY i SETTINGS merge_selecting_sleep_ms=1000; +INSERT INTO test_without_merge SELECT 1; +INSERT INTO test_without_merge SELECT 2; +INSERT INTO test_without_merge SELECT 3;" + +wait_for_number_of_parts 'test_without_merge' 1 10 + +$CLICKHOUSE_CLIENT -nmq " +DROP TABLE test_without_merge; + +SELECT 'With merge replicated any part range'; + +CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676', 'node') ORDER BY i +SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=false; +INSERT INTO test_replicated SELECT 1; +INSERT INTO test_replicated SELECT 2; +INSERT INTO test_replicated SELECT 3;" + +wait_for_number_of_parts 'test_replicated' 1 100 + +$CLICKHOUSE_CLIENT -nmq " +DROP TABLE test_replicated; + +SELECT 'With merge replicated partition only'; + +CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676_partition_only', 'node') ORDER BY i +SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=true; +INSERT INTO test_replicated SELECT 1; +INSERT INTO test_replicated SELECT 2; +INSERT INTO test_replicated SELECT 3;" + +wait_for_number_of_parts 'test_replicated' 1 100 + +$CLICKHOUSE_CLIENT -nmq " +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; + +DROP TABLE test_replicated;" diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference new file mode 100644 index 00000000000..4c27ee5b528 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.reference @@ -0,0 +1,21 @@ +2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +2.12 +-2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +-2.12 +2.987600000000000033395508580724708735942840576171875000000000 +2.15 +-2.987600000000000033395508580724708735942840576171875000000000 +-2.15 +64.1230010986 +64.2340000000 +-64.1230010986 +-64.2340000000 +-32.345 +32.34500000000000000000000000000000000000000000000000000000000000000000000000000 +32.46 +-64.5671232345 +128.78932312332132985464 +-128.78932312332132985464 +128.78932312332132985464000000000000000000000000000000000000000000000000000000000 +128.7893231233 +-128.78932312332132985464123123789323123321329854600000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql new file mode 100644 index 00000000000..563d60c62c7 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.sql @@ -0,0 +1,35 @@ +-- Regular types +SELECT toDecimalString(2, 77); -- more digits required than exist +SELECT toDecimalString(2.123456, 2); -- rounding +SELECT toDecimalString(-2, 77); -- more digits required than exist +SELECT toDecimalString(-2.123456, 2); -- rounding + +SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default) +SELECT toDecimalString(2.1456, 2); -- rounding +SELECT toDecimalString(-2.9876, 60); -- more digits required than exist +SELECT toDecimalString(-2.1456, 2); -- rounding + +-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway. +SELECT toDecimalString(64.123::Float32, 10); +SELECT toDecimalString(64.234::Float64, 10); +SELECT toDecimalString(-64.123::Float32, 10); +SELECT toDecimalString(-64.234::Float64, 10); + +-- Decimals +SELECT toDecimalString(-32.345::Decimal32(3), 3); +SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist +SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding +SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10); +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20); +SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist +SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist + +-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float. +-- These values shall work OK. +SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} diff --git a/tests/queries/0_stateless/02676_trailing_commas.reference b/tests/queries/0_stateless/02676_trailing_commas.reference new file mode 100644 index 00000000000..76d173ca23e --- /dev/null +++ b/tests/queries/0_stateless/02676_trailing_commas.reference @@ -0,0 +1,5 @@ +1 +1 +1 +1 2 0 +1 diff --git a/tests/queries/0_stateless/02676_trailing_commas.sql b/tests/queries/0_stateless/02676_trailing_commas.sql new file mode 100644 index 00000000000..048405c4d20 --- /dev/null +++ b/tests/queries/0_stateless/02676_trailing_commas.sql @@ -0,0 +1,5 @@ +SELECT 1,; +SELECT 1, FROM numbers(1); +WITH 1 as a SELECT a, FROM numbers(1); +WITH 1 as from SELECT from, from + from, from in [0], FROM numbers(1); +SELECT n, FROM (SELECT 1 AS n); diff --git a/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.reference b/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.reference new file mode 100644 index 00000000000..16d7e43ecb3 --- /dev/null +++ b/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.reference @@ -0,0 +1,4 @@ +1 0 +-------------- +-------------- +1 0 diff --git a/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql b/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql new file mode 100644 index 00000000000..f0f9845d91d --- /dev/null +++ b/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql @@ -0,0 +1,36 @@ +SELECT + bitmapHasAny(bitmapBuild([toUInt8(1)]), ( + SELECT groupBitmapState(toUInt8(1)) + )) has1, + bitmapHasAny(bitmapBuild([toUInt64(1)]), ( + SELECT groupBitmapState(toUInt64(2)) + )) has2; + +SELECT '--------------'; + +SELECT * +FROM +( + SELECT + bitmapHasAny(bitmapBuild([toUInt8(1)]), ( + SELECT groupBitmapState(toUInt8(1)) + )) has1, + bitmapHasAny(bitmapBuild([toUInt64(1)]), ( + SELECT groupBitmapState(toUInt64(2)) + )) has2 +) SETTINGS allow_experimental_analyzer = 0; -- { serverError 43 } + +SELECT '--------------'; + +SELECT * +FROM +( + SELECT + bitmapHasAny(bitmapBuild([toUInt8(1)]), ( + SELECT groupBitmapState(toUInt8(1)) + )) has1, + bitmapHasAny(bitmapBuild([toUInt64(1)]), ( + SELECT groupBitmapState(toUInt64(2)) + )) has2 +) SETTINGS allow_experimental_analyzer = 1; + diff --git a/tests/queries/0_stateless/02677_analyzer_compound_expressions.reference b/tests/queries/0_stateless/02677_analyzer_compound_expressions.reference new file mode 100644 index 00000000000..96c10398c90 --- /dev/null +++ b/tests/queries/0_stateless/02677_analyzer_compound_expressions.reference @@ -0,0 +1,18 @@ +a b +a b +1 a b +1 a b +3 +3 +[0,0,1] +[0,0,1] +3 [2,0,1] +3 [2,0,1] +['foo','bar'] [1,2] +['foo','bar'] [1,2] +['foo','bar'] [1,2] +['foo','bar'] [(1,2),(3,4)] [1,3] [2,4] +['foo','bar'] [(1,2),(3,4)] [1,3] [2,4] +['foo','bar'] [(1,2),(3,4)] [1,3] [2,4] +['foo','bar'] [(1,2),(3,4)] [1,3] [2,4] +3 diff --git a/tests/queries/0_stateless/02677_analyzer_compound_expressions.sql b/tests/queries/0_stateless/02677_analyzer_compound_expressions.sql new file mode 100644 index 00000000000..6b7fdab8993 --- /dev/null +++ b/tests/queries/0_stateless/02677_analyzer_compound_expressions.sql @@ -0,0 +1,44 @@ +SET allow_experimental_analyzer = 1; + +WITH ('a', 'b')::Tuple(c1 String, c2 String) AS t +SELECT t.c1, t.c2; + +WITH materialize(('a', 'b')::Tuple(c1 String, c2 String)) AS t +SELECT t.c1, t.c2; + +WITH (1, ('a', 'b'))::Tuple(c1 UInt64, t1 Tuple(c1 String, c2 String)) AS t +SELECT t.c1, t.t1.c1, t.t1.c2; + +WITH materialize((1, ('a', 'b'))::Tuple(c1 UInt64, t1 Tuple(c1 String, c2 String))) AS t +SELECT t.c1, t.t1.c1, t.t1.c2; + +WITH [1, 2, 3] AS arr SELECT arr.size0; +WITH materialize([1, 2, 3]) AS arr SELECT arr.size0; + +WITH [1, 2, NULL] AS arr SELECT arr.null; +WITH materialize([1, 2, NULL]) AS arr SELECT arr.null; + +WITH [[1, 2], [], [3]] AS arr SELECT arr.size0, arr.size1; +WITH materialize([[1, 2], [], [3]]) AS arr SELECT arr.size0, arr.size1; + +WITH map('foo', 1, 'bar', 2) AS m SELECT m.keys, m.values; +WITH materialize(map('foo', 1, 'bar', 2)) AS m SELECT m.keys, m.values; +WITH map('foo', 1, 'bar', 2) AS m SELECT m.*; + +WITH map('foo', (1, 2), 'bar', (3, 4))::Map(String, Tuple(a UInt64, b UInt64)) AS m +SELECT m.keys, m.values, m.values.a, m.values.b; + +WITH materialize(map('foo', (1, 2), 'bar', (3, 4))::Map(String, Tuple(a UInt64, b UInt64))) AS m +SELECT m.keys, m.values, m.values.a, m.values.b; + +WITH map('foo', (1, 2), 'bar', (3, 4))::Map(String, Tuple(a UInt64, b UInt64)) AS m +SELECT m.keys, m.values, m.values.*; + +WITH materialize(map('foo', (1, 2), 'bar', (3, 4))::Map(String, Tuple(a UInt64, b UInt64))) AS m +SELECT m.keys, m.values, m.values.*; + +WITH [1, 2, 3] AS arr SELECT arr.*; -- { serverError UNSUPPORTED_METHOD } + +SELECT getSubcolumn([1, 2, 3], 'size0'); +SELECT getSubcolumn([1, 2, 3], materialize('size0')); -- { serverError ILLEGAL_COLUMN } +SELECT getSubcolumn([1, 2, 3], 'aaa'); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/02677_decode_url_component.reference b/tests/queries/0_stateless/02677_decode_url_component.reference new file mode 100644 index 00000000000..5f88856dc1c --- /dev/null +++ b/tests/queries/0_stateless/02677_decode_url_component.reference @@ -0,0 +1,2 @@ +%D0%BA%D0%BB%D0%B8%D0%BA%D1%85%D0%B0%D1%83%D1%81 1 +1 diff --git a/tests/queries/0_stateless/02677_decode_url_component.sql b/tests/queries/0_stateless/02677_decode_url_component.sql new file mode 100644 index 00000000000..68345b5de16 --- /dev/null +++ b/tests/queries/0_stateless/02677_decode_url_component.sql @@ -0,0 +1,5 @@ +SELECT + encodeURLComponent('кликхаус') AS encoded, + decodeURLComponent(encoded) = 'кликхаус' AS expected_EQ; + +SELECT DISTINCT decodeURLComponent(encodeURLComponent(randomString(100) AS x)) = x FROM numbers(100000); diff --git a/tests/queries/0_stateless/02677_get_subcolumn_array_of_tuples.reference b/tests/queries/0_stateless/02677_get_subcolumn_array_of_tuples.reference new file mode 100644 index 00000000000..f2700561f4e --- /dev/null +++ b/tests/queries/0_stateless/02677_get_subcolumn_array_of_tuples.reference @@ -0,0 +1,3 @@ +[42] +['foo'] +1 diff --git a/tests/queries/0_stateless/02677_get_subcolumn_array_of_tuples.sql b/tests/queries/0_stateless/02677_get_subcolumn_array_of_tuples.sql new file mode 100644 index 00000000000..5779821afaa --- /dev/null +++ b/tests/queries/0_stateless/02677_get_subcolumn_array_of_tuples.sql @@ -0,0 +1,13 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t_get_subcolumn; + +CREATE TABLE t_get_subcolumn (id UInt64, n Nested(u UInt64, s String)) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_get_subcolumn VALUES (1, [42], ['foo']); + +SELECT getSubcolumn(n, 'u') FROM t_get_subcolumn; +SELECT getSubcolumn(n, 's') FROM t_get_subcolumn; +SELECT getSubcolumn(n, 'size0') FROM t_get_subcolumn; + +DROP TABLE t_get_subcolumn; diff --git a/tests/queries/0_stateless/02677_grace_hash_limit_race.reference b/tests/queries/0_stateless/02677_grace_hash_limit_race.reference new file mode 100644 index 00000000000..83b33d238da --- /dev/null +++ b/tests/queries/0_stateless/02677_grace_hash_limit_race.reference @@ -0,0 +1 @@ +1000 diff --git a/tests/queries/0_stateless/02677_grace_hash_limit_race.sql b/tests/queries/0_stateless/02677_grace_hash_limit_race.sql new file mode 100644 index 00000000000..55262ab2455 --- /dev/null +++ b/tests/queries/0_stateless/02677_grace_hash_limit_race.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test_grace_hash; + +CREATE TABLE test_grace_hash (id UInt32, value UInt64) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_grace_hash SELECT number, number % 100 = 0 FROM numbers(100000); + +SET join_algorithm = 'grace_hash'; + +SELECT count() FROM ( + SELECT f.id FROM test_grace_hash AS f + LEFT JOIN test_grace_hash AS d + ON f.id = d.id + LIMIT 1000 +); + +DROP TABLE test_grace_hash; diff --git a/tests/queries/0_stateless/01071_live_view_detach_dependency.reference b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference similarity index 100% rename from tests/queries/0_stateless/01071_live_view_detach_dependency.reference rename to tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.reference diff --git a/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql new file mode 100644 index 00000000000..e8b7405d602 --- /dev/null +++ b/tests/queries/0_stateless/02678_explain_pipeline_graph_with_projection.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1(ID UInt64, name String) engine=MergeTree order by ID; + +insert into t1(ID, name) values (1, 'abc'), (2, 'bbb'); + +-- The returned node order is uncertain +explain pipeline graph=1 select count(ID) from t1 FORMAT Null; +explain pipeline graph=1 select sum(1) from t1 FORMAT Null; +explain pipeline graph=1 select min(ID) from t1 FORMAT Null; +explain pipeline graph=1 select max(ID) from t1 FORMAT Null; + +DROP TABLE t1; diff --git a/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference b/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference new file mode 100644 index 00000000000..2fe98ea1682 --- /dev/null +++ b/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.reference @@ -0,0 +1,56 @@ +Expression ((Projection + Before ORDER BY)) +Header: id UInt64 + value String +Actions: INPUT :: 0 -> id UInt64 : 0 + INPUT :: 1 -> value String : 1 +Positions: 0 1 + ReadFromMergeTree (default.test_table) + Header: id UInt64 + value String + ReadType: Default + Parts: 0 + Granules: 0 + Prewhere info + Need filter: 1 + Prewhere filter + Prewhere filter column: equals(id, 5) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + COLUMN Const(UInt8) -> 5 UInt8 : 1 + FUNCTION equals(id : 0, 5 :: 1) -> equals(id, 5) UInt8 : 2 + Positions: 2 0 + Row level filter + Row level filter column: greaterOrEquals(id, 5) + Actions: INPUT : 0 -> id UInt64 : 0 + COLUMN Const(UInt8) -> 5 UInt8 : 1 + FUNCTION greaterOrEquals(id : 0, 5 :: 1) -> greaterOrEquals(id, 5) UInt8 : 2 + Positions: 2 0 +Expression ((Project names + (Projection + Change column names to column identifiers))) +Header: id UInt64 + value String +Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> default.test_table.id_0 UInt64 : 2 + ALIAS value :: 1 -> default.test_table.value_1 String : 0 + ALIAS default.test_table.id_0 :: 2 -> id UInt64 : 1 + ALIAS default.test_table.value_1 :: 0 -> value String : 2 +Positions: 1 2 + ReadFromMergeTree (default.test_table) + Header: id UInt64 + value String + ReadType: Default + Parts: 0 + Granules: 0 + Prewhere info + Need filter: 1 + Prewhere filter + Prewhere filter column: equals(id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1 + FUNCTION equals(id : 0, 5_UInt8 :: 1) -> equals(id, 5_UInt8) UInt8 : 2 + Positions: 2 0 + Row level filter + Row level filter column: greaterOrEquals(id, 5_UInt8) + Actions: INPUT : 0 -> id UInt64 : 0 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1 + FUNCTION greaterOrEquals(id : 0, 5_UInt8 :: 1) -> greaterOrEquals(id, 5_UInt8) UInt8 : 2 + Positions: 2 0 diff --git a/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.sql b/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.sql new file mode 100644 index 00000000000..4bc7be13490 --- /dev/null +++ b/tests/queries/0_stateless/02679_explain_merge_tree_prewhere_row_policy.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, 'Value'); + +DROP ROW POLICY IF EXISTS test_row_policy ON test_table; +CREATE ROW POLICY test_row_policy ON test_table USING id >= 5 TO ALL; + +EXPLAIN header = 1, actions = 1 SELECT id, value FROM test_table PREWHERE id = 5 settings allow_experimental_analyzer=0; +EXPLAIN header = 1, actions = 1 SELECT id, value FROM test_table PREWHERE id = 5 settings allow_experimental_analyzer=1; + +DROP ROW POLICY test_row_policy ON test_table; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/01246_insert_into_watch_live_view.reference b/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.reference similarity index 100% rename from tests/queries/0_stateless/01246_insert_into_watch_live_view.reference rename to tests/queries/0_stateless/02679_query_parameters_dangling_pointer.reference diff --git a/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.sql b/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.sql new file mode 100644 index 00000000000..7705b860e8e --- /dev/null +++ b/tests/queries/0_stateless/02679_query_parameters_dangling_pointer.sql @@ -0,0 +1,4 @@ +-- There is no use-after-free in the following query: + +SET param_o = 'a'; +CREATE TABLE test.xxx (a Int64) ENGINE=MergeTree ORDER BY ({o:String}); -- { serverError 44 } diff --git a/tests/queries/0_stateless/02680_datetime64_monotonic_check.reference b/tests/queries/0_stateless/02680_datetime64_monotonic_check.reference new file mode 100644 index 00000000000..7bff7e0c2e5 --- /dev/null +++ b/tests/queries/0_stateless/02680_datetime64_monotonic_check.reference @@ -0,0 +1,2 @@ +22 0 1 +1970-01-01 02:00:02 diff --git a/tests/queries/0_stateless/02680_datetime64_monotonic_check.sql b/tests/queries/0_stateless/02680_datetime64_monotonic_check.sql new file mode 100644 index 00000000000..6036831d05d --- /dev/null +++ b/tests/queries/0_stateless/02680_datetime64_monotonic_check.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS 02680_datetime64_monotonic_check; +DROP TABLE IF EXISTS 02680_datetime_monotonic_check_lc; + +CREATE TABLE 02680_datetime64_monotonic_check (`t` DateTime64(3), `x` Nullable(Decimal(18, 14))) +ENGINE = MergeTree +PARTITION BY toYYYYMMDD(t) +ORDER BY x SETTINGS allow_nullable_key = 1; + +INSERT INTO 02680_datetime64_monotonic_check VALUES (toDateTime64('2023-03-13 00:00:00', 3, 'Asia/Jerusalem'), 123); + +SELECT toHour(toTimeZone(t, 'UTC')) AS toHour_UTC, toHour(toTimeZone(t, 'Asia/Jerusalem')) AS toHour_Israel, count() +FROM 02680_datetime64_monotonic_check +WHERE toHour_Israel = 0 +GROUP BY toHour_UTC, toHour_Israel; + +DROP TABLE 02680_datetime64_monotonic_check; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE 02680_datetime_monotonic_check_lc (`timestamp` LowCardinality(UInt32)) +ENGINE = MergeTree +ORDER BY timestamp +SETTINGS index_granularity = 1; + +INSERT INTO 02680_datetime_monotonic_check_lc VALUES (2); + +SELECT toDateTime(timestamp, 'Asia/Jerusalem') FROM 02680_datetime_monotonic_check_lc WHERE toHour(toDateTime(timestamp, 'Asia/Jerusalem')) = 2; + +DROP TABLE 02680_datetime_monotonic_check_lc diff --git a/tests/queries/0_stateless/02521_cannot-find-column-in-projection.reference b/tests/queries/0_stateless/02680_default_star.reference similarity index 100% rename from tests/queries/0_stateless/02521_cannot-find-column-in-projection.reference rename to tests/queries/0_stateless/02680_default_star.reference diff --git a/tests/queries/0_stateless/02680_default_star.sql b/tests/queries/0_stateless/02680_default_star.sql new file mode 100644 index 00000000000..d560bd01e41 --- /dev/null +++ b/tests/queries/0_stateless/02680_default_star.sql @@ -0,0 +1,6 @@ +-- These queries yield syntax error, not logical error. + +CREATE TEMPORARY TABLE test (ad DEFAULT *); -- { clientError SYNTAX_ERROR } +CREATE TEMPORARY TABLE test (ad INT DEFAULT *); -- { clientError SYNTAX_ERROR } +CREATE TEMPORARY TABLE test (ad DEFAULT * NOT NULL); -- { clientError SYNTAX_ERROR } +CREATE TEMPORARY TABLE test (ad DEFAULT t.* NOT NULL); -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.reference b/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.sql b/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.sql new file mode 100644 index 00000000000..3ef3b8a4fe6 --- /dev/null +++ b/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.sql @@ -0,0 +1,3 @@ +CREATE TABLE test_tuple (`p` DateTime, `i` int, `j` int) ENGINE = MergeTree PARTITION BY (toDate(p), i) ORDER BY j SETTINGS index_granularity = 1; +insert into test_tuple values (1, 1, 1); +SELECT count() FROM test_tuple PREWHERE sipHash64(sipHash64(p, toString(toDate(p))), toString(toDate(p))) % -0. WHERE i > NULL settings optimize_trivial_count_query=0; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER } diff --git a/tests/queries/0_stateless/00977_live_view_watch_events.reference b/tests/queries/0_stateless/02680_instr_alias_for_position_case_insensitive.reference similarity index 66% rename from tests/queries/0_stateless/00977_live_view_watch_events.reference rename to tests/queries/0_stateless/02680_instr_alias_for_position_case_insensitive.reference index 01e79c32a8c..4792e70f333 100644 --- a/tests/queries/0_stateless/00977_live_view_watch_events.reference +++ b/tests/queries/0_stateless/02680_instr_alias_for_position_case_insensitive.reference @@ -1,3 +1,2 @@ -1 2 3 diff --git a/tests/queries/0_stateless/02680_instr_alias_for_position_case_insensitive.sql b/tests/queries/0_stateless/02680_instr_alias_for_position_case_insensitive.sql new file mode 100644 index 00000000000..c1c55c2c982 --- /dev/null +++ b/tests/queries/0_stateless/02680_instr_alias_for_position_case_insensitive.sql @@ -0,0 +1,2 @@ +select INSTR('hello', 'e'); +select INSTR('hELlo', 'L'); diff --git a/tests/queries/0_stateless/02680_lc_null_as_default.reference b/tests/queries/0_stateless/02680_lc_null_as_default.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02680_lc_null_as_default.sql b/tests/queries/0_stateless/02680_lc_null_as_default.sql new file mode 100644 index 00000000000..f6bfad37771 --- /dev/null +++ b/tests/queries/0_stateless/02680_lc_null_as_default.sql @@ -0,0 +1,6 @@ +drop table if exists test_null_as_default__fuzz_46; +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE test_null_as_default__fuzz_46 (a Nullable(DateTime64(3)), b LowCardinality(Float32) DEFAULT a + 1000) ENGINE = Memory; +INSERT INTO test_null_as_default__fuzz_46 SELECT 1, NULL UNION ALL SELECT 2, NULL; +drop table test_null_as_default__fuzz_46; + diff --git a/tests/queries/0_stateless/02680_mysql_ast_logical_err.reference b/tests/queries/0_stateless/02680_mysql_ast_logical_err.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql new file mode 100644 index 00000000000..bde91df83ca --- /dev/null +++ b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql @@ -0,0 +1,4 @@ +CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog; + +SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION } +SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } diff --git a/tests/queries/0_stateless/02681_aggregation_by_partitions_bug.reference b/tests/queries/0_stateless/02681_aggregation_by_partitions_bug.reference new file mode 100644 index 00000000000..749fce669df --- /dev/null +++ b/tests/queries/0_stateless/02681_aggregation_by_partitions_bug.reference @@ -0,0 +1 @@ +1000000 diff --git a/tests/queries/0_stateless/02681_aggregation_by_partitions_bug.sql b/tests/queries/0_stateless/02681_aggregation_by_partitions_bug.sql new file mode 100644 index 00000000000..32b4b55076b --- /dev/null +++ b/tests/queries/0_stateless/02681_aggregation_by_partitions_bug.sql @@ -0,0 +1,10 @@ +-- Tags: no-random-merge-tree-settings + +set max_threads = 16; + +create table t(a UInt32) engine=MergeTree order by tuple() partition by a % 16; + +insert into t select * from numbers_mt(1e6); + +set allow_aggregate_partitions_independently=1, force_aggregate_partitions_independently=1; +select count(distinct a) from t; diff --git a/tests/queries/0_stateless/02681_comparsion_tuple_elimination_ast.reference b/tests/queries/0_stateless/02681_comparsion_tuple_elimination_ast.reference new file mode 100644 index 00000000000..6d848fbd4b1 --- /dev/null +++ b/tests/queries/0_stateless/02681_comparsion_tuple_elimination_ast.reference @@ -0,0 +1,7 @@ +SELECT + a, + b, + c, + d +FROM t1 +WHERE (((a = 1) AND (b = 2)) AND ((c = 3) AND (d = 4) AND (a = 5))) OR ((a = c) AND (b = 10) AND (1000 = d)) OR (((a = c) AND (b = 10)) AND (1000 = d)) diff --git a/tests/queries/0_stateless/02681_comparsion_tuple_elimination_ast.sql b/tests/queries/0_stateless/02681_comparsion_tuple_elimination_ast.sql new file mode 100644 index 00000000000..7f36b0568c7 --- /dev/null +++ b/tests/queries/0_stateless/02681_comparsion_tuple_elimination_ast.sql @@ -0,0 +1,8 @@ +SET optimize_move_to_prewhere = 1; -- works only for PREWHERE + +CREATE TABLE t1 (a UInt64, b UInt64, c UInt64, d UInt64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number * 10, number * 100, number * 1000 FROM numbers(1000000); + +EXPLAIN SYNTAX +SELECT * FROM t1 +WHERE (a, b) = (1, 2) AND (c, d, a) = (3, 4, 5) OR (a, b, 1000) = (c, 10, d) OR ((a, b), 1000) = ((c, 10), d); diff --git a/tests/queries/0_stateless/02153_native_bounds_check.reference b/tests/queries/0_stateless/02681_final_excessive_reading_bug.reference similarity index 100% rename from tests/queries/0_stateless/02153_native_bounds_check.reference rename to tests/queries/0_stateless/02681_final_excessive_reading_bug.reference diff --git a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh new file mode 100755 index 00000000000..a795b9ec5a0 --- /dev/null +++ b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Tags: no-random-merge-tree-settings + +# shellcheck disable=SC2154 + +unset CLICKHOUSE_LOG_COMMENT + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "CREATE TABLE sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) ENGINE = CollapsingMergeTree(Sign) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) SAMPLE BY intHash32(UserID)" + +$CLICKHOUSE_CLIENT -q "INSERT INTO sample_final SELECT number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), if((number % 3) = 1, -1, 1) FROM numbers(1000000)" + +query_id="${CLICKHOUSE_DATABASE}_final_excessive_reading_bug_$RANDOM" +$CLICKHOUSE_CLIENT --query_id="$query_id" -q "select * from sample_final FINAL SAMPLE 1/2 OFFSET 1/2 format Null settings max_threads=16" + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q " +SELECT ProfileEvents['SelectedRows'] < 1_000_000 + FROM system.query_log + WHERE event_date >= yesterday() AND type = 'QueryFinish' AND query_id = {query_id:String} AND current_database = currentDatabase()" diff --git a/tests/queries/0_stateless/02681_group_array_too_large_size.reference b/tests/queries/0_stateless/02681_group_array_too_large_size.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02681_group_array_too_large_size.sql b/tests/queries/0_stateless/02681_group_array_too_large_size.sql new file mode 100644 index 00000000000..7b09f9b468e --- /dev/null +++ b/tests/queries/0_stateless/02681_group_array_too_large_size.sql @@ -0,0 +1,8 @@ +-- This query throw high-level exception instead of low-level "too large size passed to allocator": + +SELECT * FROM format(CSV, 'entitypArray AggregateFunction(groupArray, String)', +'295TMiews.viewN""""""TabSeparated +d St"" + + +r'); -- { serverError TOO_LARGE_ARRAY_SIZE } diff --git a/tests/queries/0_stateless/02681_undrop_query.reference b/tests/queries/0_stateless/02681_undrop_query.reference new file mode 100644 index 00000000000..5df6c392eb9 --- /dev/null +++ b/tests/queries/0_stateless/02681_undrop_query.reference @@ -0,0 +1,32 @@ +test MergeTree undrop +02681_undrop_mergetree +1 +2 +3 +test detach +UPDATE num = 2 WHERE id = 1 +test MergeTree with cluster +02681_undrop_uuid_on_cluster +1 +2 +3 +test MergeTree without uuid on cluster +02681_undrop_no_uuid_on_cluster +1 +2 +3 +test ReplicatedMergeTree undrop +02681_undrop_replicatedmergetree +1 +2 +3 +test Log undrop +02681_undrop_log +1 +2 +3 +test Distributed undrop +02681_undrop_distributed +test MergeTree drop and undrop multiple times +02681_undrop_multiple +3 diff --git a/tests/queries/0_stateless/02681_undrop_query.sql b/tests/queries/0_stateless/02681_undrop_query.sql new file mode 100644 index 00000000000..ead1a8bb305 --- /dev/null +++ b/tests/queries/0_stateless/02681_undrop_query.sql @@ -0,0 +1,90 @@ +-- Tags: no-ordinary-database, no-replicated-database, distributed, zookeeper + +set database_atomic_wait_for_drop_and_detach_synchronously = 0; +set allow_experimental_undrop_table_query = 1; + +select 'test MergeTree undrop'; +drop table if exists 02681_undrop_mergetree sync; +create table 02681_undrop_mergetree (id Int32) Engine=MergeTree() order by id; +insert into 02681_undrop_mergetree values (1),(2),(3); +drop table 02681_undrop_mergetree; +select table from system.dropped_tables where table = '02681_undrop_mergetree' limit 1; +undrop table 02681_undrop_mergetree; +select * from 02681_undrop_mergetree order by id; +drop table 02681_undrop_mergetree sync; + +select 'test detach'; +drop table if exists 02681_undrop_detach sync; +create table 02681_undrop_detach (id Int32, num Int32) Engine=MergeTree() order by id; +insert into 02681_undrop_detach values (1, 1); +detach table 02681_undrop_detach; +undrop table 02681_undrop_detach; -- { serverError 57 } +attach table 02681_undrop_detach; +alter table 02681_undrop_detach update num = 2 where id = 1; +select command from system.mutations where table='02681_undrop_detach' limit 1; +drop table 02681_undrop_detach sync; + +select 'test MergeTree with cluster'; +drop table if exists 02681_undrop_uuid_on_cluster on cluster test_shard_localhost sync format Null; +create table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id format Null; +insert into 02681_undrop_uuid_on_cluster values (1),(2),(3); +drop table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost format Null; +select table from system.dropped_tables where table = '02681_undrop_uuid_on_cluster' limit 1; +undrop table 02681_undrop_uuid_on_cluster on cluster test_shard_localhost format Null; +select * from 02681_undrop_uuid_on_cluster order by id; +drop table 02681_undrop_uuid_on_cluster sync; + +select 'test MergeTree without uuid on cluster'; +drop table if exists 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost sync format Null; +create table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id format Null; +insert into 02681_undrop_no_uuid_on_cluster values (1),(2),(3); +drop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost format Null; +select table from system.dropped_tables where table = '02681_undrop_no_uuid_on_cluster' limit 1; +undrop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost format Null; +select * from 02681_undrop_no_uuid_on_cluster order by id; +drop table 02681_undrop_no_uuid_on_cluster on cluster test_shard_localhost sync format Null; + +select 'test ReplicatedMergeTree undrop'; +drop table if exists 02681_undrop_replicatedmergetree sync; +create table 02681_undrop_replicatedmergetree (id Int32) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/02681_undrop_replicatedmergetree', 'test_undrop') order by id; +insert into 02681_undrop_replicatedmergetree values (1),(2),(3); +drop table 02681_undrop_replicatedmergetree; +select table from system.dropped_tables where table = '02681_undrop_replicatedmergetree' limit 1; +undrop table 02681_undrop_replicatedmergetree; +select * from 02681_undrop_replicatedmergetree order by id; +drop table 02681_undrop_replicatedmergetree sync; + +select 'test Log undrop'; +drop table if exists 02681_undrop_log sync; +create table 02681_undrop_log (id Int32) Engine=Log(); +insert into 02681_undrop_log values (1),(2),(3); +drop table 02681_undrop_log; +select table from system.dropped_tables where table = '02681_undrop_log' limit 1; +undrop table 02681_undrop_log; +select * from 02681_undrop_log order by id; +drop table 02681_undrop_log sync; + +select 'test Distributed undrop'; +drop table if exists 02681_undrop_distributed sync; +create table 02681_undrop_distributed (id Int32) Engine = Distributed(test_shard_localhost, currentDatabase(), 02681_undrop, rand()); +drop table 02681_undrop_distributed; +select table from system.dropped_tables where table = '02681_undrop_distributed' limit 1; +undrop table 02681_undrop_distributed; +drop table 02681_undrop_distributed sync; + +select 'test MergeTree drop and undrop multiple times'; +drop table if exists 02681_undrop_multiple sync; +create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; +insert into 02681_undrop_multiple values (1); +drop table 02681_undrop_multiple; +create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; +insert into 02681_undrop_multiple values (2); +drop table 02681_undrop_multiple; +create table 02681_undrop_multiple (id Int32) Engine=MergeTree() order by id; +insert into 02681_undrop_multiple values (3); +drop table 02681_undrop_multiple; +select table from system.dropped_tables where table = '02681_undrop_multiple' limit 1; +undrop table 02681_undrop_multiple; +select * from 02681_undrop_multiple order by id; +undrop table 02681_undrop_multiple; -- { serverError 57 } +drop table 02681_undrop_multiple sync; diff --git a/tests/queries/0_stateless/02681_undrop_query_uuid.reference b/tests/queries/0_stateless/02681_undrop_query_uuid.reference new file mode 100644 index 00000000000..beae016401b --- /dev/null +++ b/tests/queries/0_stateless/02681_undrop_query_uuid.reference @@ -0,0 +1,6 @@ +test MergeTree with uuid +02681_undrop_uuid +OK +1 +2 +3 diff --git a/tests/queries/0_stateless/02681_undrop_query_uuid.sh b/tests/queries/0_stateless/02681_undrop_query_uuid.sh new file mode 100755 index 00000000000..a93f30ef459 --- /dev/null +++ b/tests/queries/0_stateless/02681_undrop_query_uuid.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: no-ordinary-database, no-replicated-database + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo 'test MergeTree with uuid' +${CLICKHOUSE_CLIENT} -q "drop table if exists 02681_undrop_uuid sync;" +uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT generateUUIDv4()") +uuid2=$(${CLICKHOUSE_CLIENT} --query "SELECT generateUUIDv4()") +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "create table 02681_undrop_uuid UUID '$uuid' on cluster test_shard_localhost (id Int32) Engine=MergeTree() order by id;" +${CLICKHOUSE_CLIENT} -q "insert into 02681_undrop_uuid values (1),(2),(3);" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "drop table 02681_undrop_uuid on cluster test_shard_localhost settings database_atomic_wait_for_drop_and_detach_synchronously = 0;" +${CLICKHOUSE_CLIENT} -q "select table from system.dropped_tables where table = '02681_undrop_uuid' limit 1;" +${CLICKHOUSE_CLIENT} -q "undrop table 02681_undrop_uuid UUID '$uuid2' settings allow_experimental_undrop_table_query = 1;" 2>&1| grep -Faq "UNKNOWN_TABLE" && echo OK +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -q "undrop table 02681_undrop_uuid UUID '$uuid' on cluster test_shard_localhost settings allow_experimental_undrop_table_query = 1;" +${CLICKHOUSE_CLIENT} -q "select * from 02681_undrop_uuid order by id;" +${CLICKHOUSE_CLIENT} -q "drop table 02681_undrop_uuid sync;" diff --git a/tests/queries/0_stateless/02682_quantiles_too_large_size.reference b/tests/queries/0_stateless/02682_quantiles_too_large_size.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02682_quantiles_too_large_size.sql b/tests/queries/0_stateless/02682_quantiles_too_large_size.sql new file mode 100644 index 00000000000..fff98f667c7 Binary files /dev/null and b/tests/queries/0_stateless/02682_quantiles_too_large_size.sql differ diff --git a/tests/queries/0_stateless/02683_native_too_large_size.reference b/tests/queries/0_stateless/02683_native_too_large_size.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02683_native_too_large_size.sql b/tests/queries/0_stateless/02683_native_too_large_size.sql new file mode 100644 index 00000000000..e8752477f2d Binary files /dev/null and b/tests/queries/0_stateless/02683_native_too_large_size.sql differ diff --git a/tests/queries/0_stateless/02684_bson.reference b/tests/queries/0_stateless/02684_bson.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02684_bson.sql b/tests/queries/0_stateless/02684_bson.sql new file mode 100644 index 00000000000..cab5600eff0 Binary files /dev/null and b/tests/queries/0_stateless/02684_bson.sql differ diff --git a/tests/queries/0_stateless/02685_bson2.reference b/tests/queries/0_stateless/02685_bson2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02685_bson2.sql b/tests/queries/0_stateless/02685_bson2.sql new file mode 100644 index 00000000000..fc65d2952df Binary files /dev/null and b/tests/queries/0_stateless/02685_bson2.sql differ diff --git a/tests/queries/0_stateless/02685_decimal256_various.reference b/tests/queries/0_stateless/02685_decimal256_various.reference new file mode 100644 index 00000000000..848c5e0b163 --- /dev/null +++ b/tests/queries/0_stateless/02685_decimal256_various.reference @@ -0,0 +1,97 @@ +-- { echoOn } + +SELECT 1.1::Decimal(60, 30); +1.1 +SELECT round(1.1::Decimal(60, 30)); +1 +SELECT round(1.1::Decimal(60, 30), 1); +1.1 +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 1); +1.2 +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 30); +1.234567890123456789012345678901 +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 31); +1.234567890123456789012345678901 +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 20); +1.23456789012345678901 +SELECT hex(1.234567890123456789012345678901::Decimal(60, 30)); +356C760E4FC986A2A39F1A950F00000000000000000000000000000000000000 +SELECT bin(1.234567890123456789012345678901::Decimal(60, 30)); +0011010101101100011101100000111001001111110010011000011010100010101000111001111100011010100101010000111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +SELECT reinterpret(unhex(hex(1.234567890123456789012345678901::Decimal(60, 30))), 'Decimal(60, 30)'); +1.234567890123456789012345678901 +SELECT arraySum([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +4.65 +SELECT arraySum([1.2::Decimal(60, 30), 3.45::Decimal(3, 2)]); +4.65 +SELECT arrayMin([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +1.2 +SELECT arrayMax([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +3.45 +SELECT arrayAvg([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +2.325 +SELECT round(arrayProduct([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]), 6); +4.14 +SELECT toTypeName(arrayProduct([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)])); +Float64 +SELECT arrayCumSum([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +[1.2,4.65] +SELECT arrayCumSumNonNegative([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +[1.2,4.65] +SELECT arrayDifference([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +[0,2.25] +SELECT arrayCompact([1.2::Decimal(60, 30) AS x, x, x, x, 3.45::Decimal(3, 2) AS y, y, x, x]); +[1.2,3.45,1.2] +SELECT 1.2::Decimal(2, 1) IN (1.2::Decimal(60, 30), 3.4::Decimal(60, 30)); +1 +SELECT 1.23::Decimal(3, 2) IN (1.2::Decimal(60, 30), 3.4::Decimal(60, 30)); +0 +SELECT 1.2::Decimal(60, 30) IN (1.2::Decimal(2, 1)); +1 +SELECT toTypeName([1.2::Decimal(60, 30), 3.45::Decimal(3, 2)]); +Array(Decimal(76, 30)) +SELECT toTypeName(arraySum([1.2::Decimal(60, 30), 3.45::Decimal(3, 2)])); +Decimal(76, 30) +SELECT arrayJoin(sumMap(x)) FROM (SELECT [('Hello', 1.2::Decimal256(30)), ('World', 3.4::Decimal256(30))]::Map(String, Decimal256(30)) AS x UNION ALL SELECT [('World', 5.6::Decimal256(30)), ('GoodBye', -111.222::Decimal256(30))]::Map(String, Decimal256(30))) ORDER BY 1; +('GoodBye',-111.222) +('Hello',1.2) +('World',9) +SELECT mapAdd(map('Hello', 1.2::Decimal128(30), 'World', 3.4::Decimal128(30)), map('World', 5.6::Decimal128(30), 'GoodBye', -111.222::Decimal128(30))); +{'GoodBye':-111.222,'Hello':1.2,'World':9} +SELECT mapSubtract(map('Hello', 1.2::Decimal128(30), 'World', 3.4::Decimal128(30)), map('World', 5.6::Decimal128(30), 'GoodBye', -111.222::Decimal128(30))); +{'GoodBye':111.222,'Hello':1.2,'World':-2.2} +SELECT arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(UInt256))); +[2,3] +SELECT toTypeName(arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(UInt128)))); +Array(UInt128) +SELECT toTypeName(arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int128)))); +Array(Int128) +SELECT arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int128))); +[2,3] +SELECT arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int8))); +[2,3] +SELECT toTypeName(arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int8)))); +Array(Int8) +SELECT arraySort(arrayIntersect([1.1::Decimal256(70), 2.34::Decimal256(60), 3.456::Decimal256(50)], [2.34::Decimal256(65), 3.456::Decimal256(55), 4.5678::Decimal256(45)])); +[2.34,3.456] +SELECT arraySort(arrayIntersect([1.1::Decimal256(1)], [1.12::Decimal256(2)])); -- Note: this is correct but the semantics has to be clarified in the docs. +[1.1] +SELECT arraySort(arrayIntersect([1.1::Decimal256(2)], [1.12::Decimal256(2)])); +[] +SELECT arraySort(arrayIntersect([1.1::Decimal128(1)], [1.12::Decimal128(2)])); -- Note: this is correct but the semantics has to be clarified in the docs. +[1.1] +SELECT arraySort(arrayIntersect([1.1::Decimal128(2)], [1.12::Decimal128(2)])); +[] +select coalesce(cast('123', 'Nullable(Decimal(20, 10))'), 0); +123 +select coalesce(cast('123', 'Nullable(Decimal(40, 10))'), 0); +123 +select coalesce(cast('123', 'Decimal(40, 10)'), 0); +123 +DROP TABLE IF EXISTS decimal_insert_cast_issue; +create table decimal_insert_cast_issue (a Decimal(76, 0)) engine = TinyLog; +SET param_param = 1; +INSERT INTO decimal_insert_cast_issue VALUES ({param:Nullable(Decimal(41, 0))}); +SELECT * FROM decimal_insert_cast_issue; +1 +DROP TABLE decimal_insert_cast_issue; diff --git a/tests/queries/0_stateless/02685_decimal256_various.sql b/tests/queries/0_stateless/02685_decimal256_various.sql new file mode 100644 index 00000000000..545eaefe35e --- /dev/null +++ b/tests/queries/0_stateless/02685_decimal256_various.sql @@ -0,0 +1,65 @@ +-- { echoOn } + +SELECT 1.1::Decimal(60, 30); +SELECT round(1.1::Decimal(60, 30)); +SELECT round(1.1::Decimal(60, 30), 1); +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 1); +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 30); +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 31); +SELECT round(1.234567890123456789012345678901::Decimal(60, 30), 20); + +SELECT hex(1.234567890123456789012345678901::Decimal(60, 30)); +SELECT bin(1.234567890123456789012345678901::Decimal(60, 30)); +SELECT reinterpret(unhex(hex(1.234567890123456789012345678901::Decimal(60, 30))), 'Decimal(60, 30)'); + +SELECT arraySum([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +SELECT arraySum([1.2::Decimal(60, 30), 3.45::Decimal(3, 2)]); + +SELECT arrayMin([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +SELECT arrayMax([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +SELECT arrayAvg([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); + +SELECT round(arrayProduct([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]), 6); +SELECT toTypeName(arrayProduct([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)])); + +SELECT arrayCumSum([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +SELECT arrayCumSumNonNegative([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); +SELECT arrayDifference([1.2::Decimal(60, 30), 3.45::Decimal(61, 29)]); + +SELECT arrayCompact([1.2::Decimal(60, 30) AS x, x, x, x, 3.45::Decimal(3, 2) AS y, y, x, x]); + +SELECT 1.2::Decimal(2, 1) IN (1.2::Decimal(60, 30), 3.4::Decimal(60, 30)); +SELECT 1.23::Decimal(3, 2) IN (1.2::Decimal(60, 30), 3.4::Decimal(60, 30)); +SELECT 1.2::Decimal(60, 30) IN (1.2::Decimal(2, 1)); + +SELECT toTypeName([1.2::Decimal(60, 30), 3.45::Decimal(3, 2)]); +SELECT toTypeName(arraySum([1.2::Decimal(60, 30), 3.45::Decimal(3, 2)])); + +SELECT arrayJoin(sumMap(x)) FROM (SELECT [('Hello', 1.2::Decimal256(30)), ('World', 3.4::Decimal256(30))]::Map(String, Decimal256(30)) AS x UNION ALL SELECT [('World', 5.6::Decimal256(30)), ('GoodBye', -111.222::Decimal256(30))]::Map(String, Decimal256(30))) ORDER BY 1; + +SELECT mapAdd(map('Hello', 1.2::Decimal128(30), 'World', 3.4::Decimal128(30)), map('World', 5.6::Decimal128(30), 'GoodBye', -111.222::Decimal128(30))); +SELECT mapSubtract(map('Hello', 1.2::Decimal128(30), 'World', 3.4::Decimal128(30)), map('World', 5.6::Decimal128(30), 'GoodBye', -111.222::Decimal128(30))); + +SELECT arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(UInt256))); +SELECT toTypeName(arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(UInt128)))); +SELECT toTypeName(arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int128)))); +SELECT arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int128))); +SELECT arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int8))); +SELECT toTypeName(arraySort(arrayIntersect([1, 2, 3]::Array(UInt256), [2, 3, 4]::Array(Int8)))); + +SELECT arraySort(arrayIntersect([1.1::Decimal256(70), 2.34::Decimal256(60), 3.456::Decimal256(50)], [2.34::Decimal256(65), 3.456::Decimal256(55), 4.5678::Decimal256(45)])); +SELECT arraySort(arrayIntersect([1.1::Decimal256(1)], [1.12::Decimal256(2)])); -- Note: this is correct but the semantics has to be clarified in the docs. +SELECT arraySort(arrayIntersect([1.1::Decimal256(2)], [1.12::Decimal256(2)])); +SELECT arraySort(arrayIntersect([1.1::Decimal128(1)], [1.12::Decimal128(2)])); -- Note: this is correct but the semantics has to be clarified in the docs. +SELECT arraySort(arrayIntersect([1.1::Decimal128(2)], [1.12::Decimal128(2)])); + +select coalesce(cast('123', 'Nullable(Decimal(20, 10))'), 0); +select coalesce(cast('123', 'Nullable(Decimal(40, 10))'), 0); +select coalesce(cast('123', 'Decimal(40, 10)'), 0); + +DROP TABLE IF EXISTS decimal_insert_cast_issue; +create table decimal_insert_cast_issue (a Decimal(76, 0)) engine = TinyLog; +SET param_param = 1; +INSERT INTO decimal_insert_cast_issue VALUES ({param:Nullable(Decimal(41, 0))}); +SELECT * FROM decimal_insert_cast_issue; +DROP TABLE decimal_insert_cast_issue; diff --git a/tests/queries/0_stateless/02686_bson3.reference b/tests/queries/0_stateless/02686_bson3.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02686_bson3.sql b/tests/queries/0_stateless/02686_bson3.sql new file mode 100644 index 00000000000..05a73e814dd Binary files /dev/null and b/tests/queries/0_stateless/02686_bson3.sql differ diff --git a/tests/queries/0_stateless/02686_postgres_protocol_decimal_256.reference b/tests/queries/0_stateless/02686_postgres_protocol_decimal_256.reference new file mode 100644 index 00000000000..5e61b14b9a1 --- /dev/null +++ b/tests/queries/0_stateless/02686_postgres_protocol_decimal_256.reference @@ -0,0 +1,5 @@ + test +------ + 1.23 +(1 row) + diff --git a/tests/queries/0_stateless/02686_postgres_protocol_decimal_256.sh b/tests/queries/0_stateless/02686_postgres_protocol_decimal_256.sh new file mode 100755 index 00000000000..2a94f940327 --- /dev/null +++ b/tests/queries/0_stateless/02686_postgres_protocol_decimal_256.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest +# Tag no-fasttest: needs psql + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo " +DROP USER IF EXISTS postgresql_user; +CREATE USER postgresql_user HOST IP '127.0.0.1' IDENTIFIED WITH no_password; +" | $CLICKHOUSE_CLIENT -n + +psql --host localhost --port ${CLICKHOUSE_PORT_POSTGRESQL} ${CLICKHOUSE_DATABASE} --user postgresql_user -c "SELECT 1.23::Decimal256(70) AS test;" diff --git a/tests/queries/0_stateless/02687_native_fuzz.reference b/tests/queries/0_stateless/02687_native_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02687_native_fuzz.sql b/tests/queries/0_stateless/02687_native_fuzz.sql new file mode 100644 index 00000000000..0cd11390918 Binary files /dev/null and b/tests/queries/0_stateless/02687_native_fuzz.sql differ diff --git a/tests/queries/0_stateless/02688_aggregate_states.reference b/tests/queries/0_stateless/02688_aggregate_states.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02688_aggregate_states.sql b/tests/queries/0_stateless/02688_aggregate_states.sql new file mode 100644 index 00000000000..575f4b1bb3a --- /dev/null +++ b/tests/queries/0_stateless/02688_aggregate_states.sql @@ -0,0 +1,7 @@ +SELECT '\x01\x00'::AggregateFunction(groupBitmap, UInt32); -- { serverError INCORRECT_DATA } +SELECT '\x01\x01\x01'::AggregateFunction(groupBitmap, UInt64); -- { serverError STD_EXCEPTION } +SELECT '\x02\x00\x0d'::AggregateFunction(topK, UInt256); -- { serverError CANNOT_READ_ALL_DATA } +SELECT unhex('bebebebebebebebebebebebebebebebebebebebebebebebebebebebebebebe0c0c3131313131313131313131313173290aee00b300')::AggregateFunction(minDistinct, Int8); -- { serverError TOO_LARGE_ARRAY_SIZE } +SELECT unhex('01000b0b0b0d0d0d0d7175616e74696c6554696d696e672c20496e743332000300')::AggregateFunction(quantileTiming, Int32); -- { serverError INCORRECT_DATA } +SELECT unhex('010001')::AggregateFunction(quantileTiming, Int32); -- { serverError INCORRECT_DATA } +SELECT unhex('0a00797979797979797979790a0a6e')::AggregateFunction(minForEach, Ring); -- { serverError TOO_LARGE_ARRAY_SIZE } diff --git a/tests/queries/0_stateless/02688_long_aggregate_function_names.reference b/tests/queries/0_stateless/02688_long_aggregate_function_names.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02688_long_aggregate_function_names.sql b/tests/queries/0_stateless/02688_long_aggregate_function_names.sql new file mode 100644 index 00000000000..266bbd62957 --- /dev/null +++ b/tests/queries/0_stateless/02688_long_aggregate_function_names.sql @@ -0,0 +1 @@ +SELECT minOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNullOrNull(1); -- { serverError TOO_LARGE_STRING_SIZE } diff --git a/tests/queries/0_stateless/02689_meaningless_data_types.reference b/tests/queries/0_stateless/02689_meaningless_data_types.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02689_meaningless_data_types.sql b/tests/queries/0_stateless/02689_meaningless_data_types.sql new file mode 100644 index 00000000000..8ae702d66df --- /dev/null +++ b/tests/queries/0_stateless/02689_meaningless_data_types.sql @@ -0,0 +1,3 @@ +SELECT 0::Bool(Upyachka); -- { serverError DATA_TYPE_CANNOT_HAVE_ARGUMENTS } +SELECT [(1, 2), (3, 4)]::Ring(Upyachka); -- { serverError DATA_TYPE_CANNOT_HAVE_ARGUMENTS } +SELECT '1.1.1.1'::IPv4('Hello, world!'); -- { serverError DATA_TYPE_CANNOT_HAVE_ARGUMENTS } diff --git a/tests/queries/0_stateless/02690_subquery_identifiers.reference b/tests/queries/0_stateless/02690_subquery_identifiers.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02690_subquery_identifiers.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02690_subquery_identifiers.sql b/tests/queries/0_stateless/02690_subquery_identifiers.sql new file mode 100644 index 00000000000..07fcb0ffff9 --- /dev/null +++ b/tests/queries/0_stateless/02690_subquery_identifiers.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t_str; + +CREATE TABLE t_str +( + `creation_time` String +) +ENGINE = MergeTree +PARTITION BY creation_time +ORDER BY creation_time; + +insert into t_str values ('2020-02-02'); + +select 1 as x from t_str where cast('1970-01-01' as date) <= cast((select max('1970-01-01') from numbers(1)) as date); +select * from ( select 1 as x from t_str where cast('1970-01-01' as date) <= cast((select max('1970-01-01') from numbers(1)) as date)); +SELECT * FROM (SELECT * FROM t_str WHERE (SELECT any('1970-01-01'))::Date > today()); + +DROP TABLE t_str; diff --git a/tests/queries/0_stateless/02691_drop_column_with_projections_replicated.reference b/tests/queries/0_stateless/02691_drop_column_with_projections_replicated.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02691_drop_column_with_projections_replicated.sql b/tests/queries/0_stateless/02691_drop_column_with_projections_replicated.sql new file mode 100644 index 00000000000..c28c2f233dd --- /dev/null +++ b/tests/queries/0_stateless/02691_drop_column_with_projections_replicated.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS 02691_drop_column_replicated; + +CREATE TABLE 02691_drop_column_replicated (col1 Int64, col2 Int64, PROJECTION 02691_drop_column_replicated (SELECT * ORDER BY col1 )) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/02691_drop_column', 'r1') +ORDER BY col1; + +INSERT INTO 02691_drop_column_replicated VALUES (1, 2); + +ALTER TABLE 02691_drop_column_replicated DROP COLUMN col2 SETTINGS alter_sync = 2; + +DROP TABLE 02691_drop_column_replicated; diff --git a/tests/queries/0_stateless/02691_multiple_joins_backtick_identifiers.reference b/tests/queries/0_stateless/02691_multiple_joins_backtick_identifiers.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02691_multiple_joins_backtick_identifiers.sql b/tests/queries/0_stateless/02691_multiple_joins_backtick_identifiers.sql new file mode 100644 index 00000000000..4a56c12866e --- /dev/null +++ b/tests/queries/0_stateless/02691_multiple_joins_backtick_identifiers.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 (`1a` Nullable(Int64), `2b` Nullable(String)) engine = Memory; +CREATE TABLE t2 (`3c` Nullable(Int64), `4d` Nullable(String)) engine = Memory; +CREATE TABLE t3 (`5e` Nullable(Int64), `6f` Nullable(String)) engine = Memory; + +SELECT + `1a`, + `2b` +FROM t1 AS tt1 +INNER JOIN +( + SELECT `3c` + FROM t2 +) AS tt2 ON tt1.`1a` = tt2.`3c` +INNER JOIN +( + SELECT `6f` + FROM t3 +) AS tt3 ON tt1.`2b` = tt3.`6f`; + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; + +CREATE TABLE t1 (`a` Nullable(Int64), `b` Nullable(String)) engine = Memory; +CREATE TABLE t2 (`c` Nullable(Int64), `d` Nullable(String)) engine = Memory; +CREATE TABLE t3 (`e` Nullable(Int64), `f` Nullable(String)) engine = Memory; + +SELECT + a, + b +FROM t1 AS tt1 +INNER JOIN +( + SELECT c + FROM t2 +) AS tt2 ON tt1.a = tt2.c +INNER JOIN +( + SELECT f + FROM t3 +) AS tt3 ON tt1.b = tt3.f; + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; diff --git a/tests/queries/0_stateless/02692_multiple_joins_unicode.reference b/tests/queries/0_stateless/02692_multiple_joins_unicode.reference new file mode 100644 index 00000000000..3aca4fe5c7b --- /dev/null +++ b/tests/queries/0_stateless/02692_multiple_joins_unicode.reference @@ -0,0 +1,2 @@ +2021-01-01 上海市 启用 +2021-01-02 北京市 停用 diff --git a/tests/queries/0_stateless/02692_multiple_joins_unicode.sql b/tests/queries/0_stateless/02692_multiple_joins_unicode.sql new file mode 100644 index 00000000000..d622c556e56 --- /dev/null +++ b/tests/queries/0_stateless/02692_multiple_joins_unicode.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS store; +DROP TABLE IF EXISTS location; +DROP TABLE IF EXISTS sales; + +CREATE TABLE store (id UInt32, "名称" String, "状态" String) ENGINE=MergeTree() Order by id; +CREATE TABLE location (id UInt32, name String) ENGINE=MergeTree() Order by id; +CREATE TABLE sales ("日期" Date, "店铺" UInt32, "地址" UInt32, "销售额" Float32) ENGINE=MergeTree() Order by "日期"; + +INSERT INTO store VALUES (1,'店铺1','启用'),(2,'店铺2','停用'); +INSERT INTO location VALUES (1,'上海市'),(2,'北京市'); +INSERT INTO sales VALUES ('2021-01-01',1,1,10),('2021-01-02',2,2,20); + +SELECT + `日期`, + location.name, + store.`状态` +FROM sales +LEFT JOIN store ON store.id = `店铺` +LEFT JOIN location ON location.id = `地址` +ORDER BY 1, 2, 3; + +DROP TABLE store; +DROP TABLE location; +DROP TABLE sales; diff --git a/tests/queries/0_stateless/02693_multiple_joins_in.reference b/tests/queries/0_stateless/02693_multiple_joins_in.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02693_multiple_joins_in.sql b/tests/queries/0_stateless/02693_multiple_joins_in.sql new file mode 100644 index 00000000000..8be52948d26 --- /dev/null +++ b/tests/queries/0_stateless/02693_multiple_joins_in.sql @@ -0,0 +1,3 @@ +create temporary table temp_table3(val0 UInt64) ENGINE=Memory(); +select * from (select 1 as id) t1 inner join (select 1 as id) t2 on t1.id=t2.id inner join (select 1 as id) t3 on t1.id=t3.id where t1.id in temp_table3; +select * from (select 1 as id) t1 inner join (select 1 as id) t2 on t1.id=t2.id where t1.id in temp_table3; diff --git a/tests/queries/0_stateless/02694_wrong_identifier_shouldnt_be_accepted.reference b/tests/queries/0_stateless/02694_wrong_identifier_shouldnt_be_accepted.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02694_wrong_identifier_shouldnt_be_accepted.sql b/tests/queries/0_stateless/02694_wrong_identifier_shouldnt_be_accepted.sql new file mode 100644 index 00000000000..e929b1e620a --- /dev/null +++ b/tests/queries/0_stateless/02694_wrong_identifier_shouldnt_be_accepted.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS s; + +CREATE TABLE t1 ( k Int64, x Int64) ENGINE = Memory; +CREATE TABLE t2 ( x Int64 ) ENGINE = Memory; + +create table s (k Int64, d DateTime) Engine=Memory; + +SELECT * FROM t1 +INNER JOIN s ON t1.k = s.k +INNER JOIN t2 ON t2.x = t1.x +WHERE (t1.d >= now()); -- { serverError UNKNOWN_IDENTIFIER } + +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE s; diff --git a/tests/queries/0_stateless/02695_logical_optimizer_alias_bug.reference b/tests/queries/0_stateless/02695_logical_optimizer_alias_bug.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02695_logical_optimizer_alias_bug.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02695_logical_optimizer_alias_bug.sql b/tests/queries/0_stateless/02695_logical_optimizer_alias_bug.sql new file mode 100644 index 00000000000..5b13eea5e4b --- /dev/null +++ b/tests/queries/0_stateless/02695_logical_optimizer_alias_bug.sql @@ -0,0 +1,2 @@ +create table test_local (id UInt32, path LowCardinality(String)) engine = MergeTree order by id; +WITH ((position(path, '/a') > 0) AND (NOT (position(path, 'a') > 0))) OR (path = '/b') OR (path = '/b/') as alias1 SELECT max(alias1) FROM remote('127.0.0.{1,2}', currentDatabase(), test_local) WHERE (id = 299386662); diff --git a/tests/queries/0_stateless/02695_storage_join_insert_select_deadlock.reference b/tests/queries/0_stateless/02695_storage_join_insert_select_deadlock.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02695_storage_join_insert_select_deadlock.sql b/tests/queries/0_stateless/02695_storage_join_insert_select_deadlock.sql new file mode 100644 index 00000000000..59528511357 --- /dev/null +++ b/tests/queries/0_stateless/02695_storage_join_insert_select_deadlock.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test_table_join; + +CREATE TABLE test_table_join +( + id UInt64, + value String +) ENGINE = Join(Any, Left, id); + +INSERT INTO test_table_join VALUES (1, 'q'); + +INSERT INTO test_table_join SELECT * from test_table_join; -- { serverError DEADLOCK_AVOIDED } + +INSERT INTO test_table_join SELECT * FROM (SELECT 1 as id) AS t1 ANY LEFT JOIN test_table_join USING (id); -- { serverError DEADLOCK_AVOIDED } +INSERT INTO test_table_join SELECT id, toString(id) FROM (SELECT 1 as id) AS t1 ANY LEFT JOIN (SELECT id FROM test_table_join) AS t2 USING (id); -- { serverError DEADLOCK_AVOIDED } + +DROP TABLE IF EXISTS test_table_join; diff --git a/tests/queries/0_stateless/02696_ignore_inacc_tables_mat_view_atttach.reference b/tests/queries/0_stateless/02696_ignore_inacc_tables_mat_view_atttach.reference new file mode 100644 index 00000000000..bb3ee860aec --- /dev/null +++ b/tests/queries/0_stateless/02696_ignore_inacc_tables_mat_view_atttach.reference @@ -0,0 +1,2 @@ +3 some_val +3 9 diff --git a/tests/queries/0_stateless/02696_ignore_inacc_tables_mat_view_atttach.sql b/tests/queries/0_stateless/02696_ignore_inacc_tables_mat_view_atttach.sql new file mode 100644 index 00000000000..25e0ddf2ef6 --- /dev/null +++ b/tests/queries/0_stateless/02696_ignore_inacc_tables_mat_view_atttach.sql @@ -0,0 +1,23 @@ +SET send_logs_level = 'fatal'; + +CREATE TABLE test_table (n Int32, s String) ENGINE MergeTree PARTITION BY n ORDER BY n; + +CREATE TABLE mview_backend (n Int32, n2 Int64) ENGINE MergeTree PARTITION BY n ORDER BY n; + +CREATE MATERIALIZED VIEW mview TO mview_backend AS SELECT n, n * n AS "n2" FROM test_table; + +DROP TABLE test_table; + +DETACH TABLE mview; + +/* Check that we don't get an exception with the option. */ +ATTACH TABLE mview; + +/* Check if the data in the materialized view is updated after the restore.*/ +CREATE TABLE test_table (n Int32, s String) ENGINE MergeTree PARTITION BY n ORDER BY n; + +INSERT INTO test_table VALUES (3,'some_val'); + +SELECT n,s FROM test_table ORDER BY n; +SELECT n,n2 FROM mview ORDER by n; + diff --git a/tests/queries/0_stateless/02696_inverted_idx_checksums.reference b/tests/queries/0_stateless/02696_inverted_idx_checksums.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02696_inverted_idx_checksums.sql b/tests/queries/0_stateless/02696_inverted_idx_checksums.sql new file mode 100644 index 00000000000..92ffa7a6196 --- /dev/null +++ b/tests/queries/0_stateless/02696_inverted_idx_checksums.sql @@ -0,0 +1,16 @@ +SET allow_experimental_inverted_index = 1; + +CREATE TABLE t +( + `key` UInt64, + `str` String, + INDEX inv_idx str TYPE inverted(0) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO t VALUES (1, 'Hello World'); + +ALTER TABLE t DETACH PART 'all_1_1_0'; + +ALTER TABLE t ATTACH PART 'all_1_1_0'; \ No newline at end of file diff --git a/tests/queries/0_stateless/02697_alter_dependencies.reference b/tests/queries/0_stateless/02697_alter_dependencies.reference new file mode 100644 index 00000000000..d05b1f927f4 --- /dev/null +++ b/tests/queries/0_stateless/02697_alter_dependencies.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/0_stateless/02697_alter_dependencies.sql b/tests/queries/0_stateless/02697_alter_dependencies.sql new file mode 100644 index 00000000000..cf9b7551b5f --- /dev/null +++ b/tests/queries/0_stateless/02697_alter_dependencies.sql @@ -0,0 +1,16 @@ +CREATE TABLE mv_source (a Int64, insert_time DateTime) ENGINE = MergeTree() ORDER BY insert_time; +CREATE TABLE mv_target (a Int64, insert_time DateTime) ENGINE = MergeTree() ORDER BY insert_time; +CREATE MATERIALIZED VIEW source_to_target to mv_target as Select * from mv_source where a not in (Select sleepEachRow(0.1) from numbers(50)); + +ALTER TABLE mv_source MODIFY TTL insert_time + toIntervalDay(1); +SYSTEM FLUSH LOGS; +-- This is a fancy way to check that the MV hasn't been called (no functions executed by ALTER) +SELECT + ProfileEvents['FunctionExecute'], + ProfileEvents['TableFunctionExecute'] +FROM system.query_log +WHERE + type = 'QueryFinish' AND + query like '%ALTER TABLE mv_source%' AND + current_database = currentDatabase() AND + event_time > now() - INTERVAL 10 minute; diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.reference b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh new file mode 100755 index 00000000000..09837bff808 --- /dev/null +++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & +pid=$! +sleep 2 +kill -SIGINT $pid +wait $pid diff --git a/tests/queries/0_stateless/02698_marked_dropped_tables.reference b/tests/queries/0_stateless/02698_marked_dropped_tables.reference new file mode 100644 index 00000000000..44906da9527 --- /dev/null +++ b/tests/queries/0_stateless/02698_marked_dropped_tables.reference @@ -0,0 +1,8 @@ +25400_dropped_tables MergeTree +index UInt32 +database String +table String +uuid UUID +engine String +metadata_dropped_path String +table_dropped_time DateTime diff --git a/tests/queries/0_stateless/02698_marked_dropped_tables.sql b/tests/queries/0_stateless/02698_marked_dropped_tables.sql new file mode 100644 index 00000000000..9bf6579b583 --- /dev/null +++ b/tests/queries/0_stateless/02698_marked_dropped_tables.sql @@ -0,0 +1,11 @@ +-- Tags: no-ordinary-database + +SET database_atomic_wait_for_drop_and_detach_synchronously = 0; +DROP TABLE IF EXISTS 25400_dropped_tables; + +CREATE TABLE 25400_dropped_tables (id Int32) Engine=MergeTree() ORDER BY id; +DROP TABLE 25400_dropped_tables; + +SELECT table, engine FROM system.dropped_tables WHERE database = currentDatabase() LIMIT 1; +DESCRIBE TABLE system.dropped_tables; + diff --git a/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference new file mode 100644 index 00000000000..35c94347ac9 --- /dev/null +++ b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference @@ -0,0 +1,11 @@ +[] +[] +[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]] +[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [] +[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]] +[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]] [[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]] +[[[(100.0001,1000.0001),(1000.0001,1.1920928955078125e-7),(20,-20),(20,20),(10,10),(-20,20),(100.0001,1000.0001)]]] +[[[(100.0001,1000.0001),(1000.0001,1.1920928955078125e-7),(20,-20),(20,20),(10,10),(-20,20),(100.0001,1000.0001)]]] +[(9223372036854775807,1.1754943508222875e-38)] [[(1,1.0001)]] \N [] + +[(9223372036854775807,1.1754943508222875e-38)] [] \N [] diff --git a/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql new file mode 100644 index 00000000000..85307bec6e5 --- /dev/null +++ b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql @@ -0,0 +1,5 @@ +SELECT polygonsSymDifferenceCartesian([[[(1., 1.)]] AS x], [x]) GROUP BY x WITH ROLLUP; +SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP SETTINGS allow_experimental_analyzer=0; +SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP SETTINGS allow_experimental_analyzer=1; +SELECT polygonsSymDifferenceCartesian([[[(100.0001, 1000.0001), (-20., 20.), (10., 10.), (20., 20.), (20., -20.), (1000.0001, 1.1920928955078125e-7)]],[[(0.0001, 100000000000000000000.)]] AS x],[x]) GROUP BY x WITH ROLLUP; +SELECT [(9223372036854775807, 1.1754943508222875e-38)], x, NULL, polygonsSymDifferenceCartesian([[[(1.1754943508222875e-38, 1.1920928955078125e-7), (0.5, 0.5)]], [[(1.1754943508222875e-38, 1.1920928955078125e-7), (1.1754943508222875e-38, 1.1920928955078125e-7)], [(0., 1.0001)]], [[(1., 1.0001)]] AS x], [[[(3.4028234663852886e38, 0.9999)]]]) GROUP BY GROUPING SETS ((x)) WITH TOTALS diff --git a/tests/queries/0_stateless/02700_regexp_operator.reference b/tests/queries/0_stateless/02700_regexp_operator.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02700_regexp_operator.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02700_regexp_operator.sql b/tests/queries/0_stateless/02700_regexp_operator.sql new file mode 100644 index 00000000000..5a5275bf1ea --- /dev/null +++ b/tests/queries/0_stateless/02700_regexp_operator.sql @@ -0,0 +1 @@ +SELECT 'ab' REGEXP 'a.*b'; diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.reference b/tests/queries/0_stateless/02700_s3_part_INT_MAX.reference new file mode 100644 index 00000000000..8425fb4df60 --- /dev/null +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.reference @@ -0,0 +1 @@ +2097152 diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh new file mode 100755 index 00000000000..d831c7d9806 --- /dev/null +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Regression test for crash in case of part exceeds INT_MAX +# +# NOTE: .sh test is used over .sql because it needs $CLICKHOUSE_DATABASE to +# avoid truncation, since seems that the version of MinIO that is used on CI +# too slow with this. +$CLICKHOUSE_CLIENT -nm -q " + INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') + SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) + SETTINGS s3_max_single_part_upload_size = '10Gi'; + + SELECT count() FROM s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv'); +" diff --git a/tests/queries/0_stateless/02701_fail_on_invalid_having.reference b/tests/queries/0_stateless/02701_fail_on_invalid_having.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02701_fail_on_invalid_having.sql b/tests/queries/0_stateless/02701_fail_on_invalid_having.sql new file mode 100644 index 00000000000..092bda23164 --- /dev/null +++ b/tests/queries/0_stateless/02701_fail_on_invalid_having.sql @@ -0,0 +1 @@ +SELECT a, sum(b) FROM (SELECT 1 AS a, 1 AS b, 0 AS c) GROUP BY a HAVING c SETTINGS allow_experimental_analyzer=1 -- { serverError NOT_AN_AGGREGATE } diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql new file mode 100644 index 00000000000..5261fa7b082 --- /dev/null +++ b/tests/queries/0_stateless/02701_non_parametric_function.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } + +CREATE FUNCTION IF NOT EXISTS sum_udf as (x, y) -> (x + y); + +SELECT sum_udf(1)(1, 2); -- { serverError 309 } + +DROP FUNCTION IF EXISTS sum_udf; diff --git a/tests/queries/0_stateless/02702_allow_skip_errors_enum.reference b/tests/queries/0_stateless/02702_allow_skip_errors_enum.reference new file mode 100644 index 00000000000..f9264f7fbd3 --- /dev/null +++ b/tests/queries/0_stateless/02702_allow_skip_errors_enum.reference @@ -0,0 +1,2 @@ +Hello +World diff --git a/tests/queries/0_stateless/02702_allow_skip_errors_enum.sh b/tests/queries/0_stateless/02702_allow_skip_errors_enum.sh new file mode 100755 index 00000000000..e68f5517d52 --- /dev/null +++ b/tests/queries/0_stateless/02702_allow_skip_errors_enum.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x Enum('Hello' = 1, 'World' = 2)) ENGINE = Memory;" +$CLICKHOUSE_CLIENT --input_format_allow_errors_num 1 --query "INSERT INTO t FORMAT CSV" < 0) AND (x1 > 0) AND (x2 > 0) +ORDER BY group_key ASC +LIMIT 10 +SETTINGS max_bytes_before_external_group_by = 200000 +" && echo -n '.' + +echo diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh new file mode 100755 index 00000000000..3964427895c --- /dev/null +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: no-ordinary-database, zookeeper, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function create_drop_loop() +{ + table_name="02703_keeper_map_concurrent_$1" + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS $table_name" + for _ in `seq $1` + do + sleep 0.3 + done + + i=0 + while true; + do + $CLICKHOUSE_CLIENT --query="CREATE TABLE IF NOT EXISTS $table_name (key UInt64, value UInt64) ENGINE = KeeperMap('/02703_keeper_map/$CLICKHOUSE_DATABASE') PRIMARY KEY(key)" + $CLICKHOUSE_CLIENT --query="INSERT INTO $table_name VALUES ($1, $i)" + result=$($CLICKHOUSE_CLIENT --query="SELECT value FROM $table_name WHERE key = $1") + + if [ $result != $i ] + then + echo "Got invalid result $result" + exit 1 + fi + + $CLICKHOUSE_CLIENT --query="DROP TABLE $table_name" + + ((++i)) + done +} + +export -f create_drop_loop; + +THREADS=10 +TIMEOUT=30 + +for i in `seq $THREADS` +do + timeout $TIMEOUT bash -c "create_drop_loop $i" 2> /dev/null & +done + +wait + +for i in `seq $THREADS` +do + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02703_keeper_map_concurrent_$i" +done + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM system.zookeeper WHERE path = '/test_keeper_map/02703_keeper_map/$CLICKHOUSE_DATABASE'" diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.reference b/tests/queries/0_stateless/02703_max_local_read_bandwidth.reference new file mode 100644 index 00000000000..e2968e9fef5 --- /dev/null +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.reference @@ -0,0 +1,3 @@ +read 1 1 1 1 +pread 1 1 1 1 +pread_threadpool 1 1 1 1 diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh new file mode 100755 index 00000000000..d47e2f363bd --- /dev/null +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; +" + +# reading 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds +$CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)" + +read_methods=( + read + pread + pread_threadpool + # NOTE: io_uring doing all IO from one thread, that is not attached to the query + # io_uring + # NOTE: mmap cannot be throttled + # mmap +) +for read_method in "${read_methods[@]}"; do + query_id=$(random_str 10) + $CLICKHOUSE_CLIENT --query_id "$query_id" -q "select * from data format Null settings max_local_read_bandwidth='1M', local_filesystem_read_method='$read_method'" + $CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT + '$read_method', + query_duration_ms >= 7e3, + ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6, + ProfileEvents['LocalReadThrottlerBytes'] > 8e6, + ProfileEvents['LocalReadThrottlerSleepMicroseconds'] > 7e6*0.9 + FROM system.query_log + WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart' + " +done diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.reference b/tests/queries/0_stateless/02703_max_local_write_bandwidth.reference new file mode 100644 index 00000000000..ad05a699da0 --- /dev/null +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.reference @@ -0,0 +1 @@ +1 1 1 1 diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh new file mode 100755 index 00000000000..41165d35d37 --- /dev/null +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-s3-storage + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; +" + +query_id=$(random_str 10) +# writes 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds +$CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data select * from numbers(1e6) settings max_local_write_bandwidth='1M'" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT + query_duration_ms >= 7e3, + ProfileEvents['WriteBufferFromFileDescriptorWriteBytes'] > 8e6, + ProfileEvents['LocalWriteThrottlerBytes'] > 8e6, + ProfileEvents['LocalWriteThrottlerSleepMicroseconds'] > 7e6*0.9 + FROM system.query_log + WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart' +" diff --git a/tests/queries/0_stateless/02703_row_policies_for_asterisk.reference b/tests/queries/0_stateless/02703_row_policies_for_asterisk.reference new file mode 100644 index 00000000000..528bd7ef91e --- /dev/null +++ b/tests/queries/0_stateless/02703_row_policies_for_asterisk.reference @@ -0,0 +1,2 @@ +Policy for table `*` does not affect other tables in the database +other 100 20 diff --git a/tests/queries/0_stateless/02703_row_policies_for_asterisk.sql b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sql new file mode 100644 index 00000000000..96b1c01a6d6 --- /dev/null +++ b/tests/queries/0_stateless/02703_row_policies_for_asterisk.sql @@ -0,0 +1,11 @@ +-- Tags: no-parallel + +SELECT 'Policy for table `*` does not affect other tables in the database'; +CREATE DATABASE 02703_db_asterisk; +CREATE ROW POLICY 02703_asterisk ON 02703_db_asterisk.`*` USING x=1 AS permissive TO ALL; +CREATE TABLE 02703_db_asterisk.`*` (x UInt8, y UInt8) ENGINE = MergeTree ORDER BY x AS SELECT 100, 20; +CREATE TABLE 02703_db_asterisk.`other` (x UInt8, y UInt8) ENGINE = MergeTree ORDER BY x AS SELECT 100, 20; +SELECT 'star', * FROM 02703_db_asterisk.`*`; +SELECT 'other', * FROM 02703_db_asterisk.other; +DROP ROW POLICY 02703_asterisk ON 02703_db_asterisk.`*`; +DROP DATABASE 02703_db_asterisk; diff --git a/tests/queries/0_stateless/02703_row_policies_for_database_combination.reference b/tests/queries/0_stateless/02703_row_policies_for_database_combination.reference new file mode 100644 index 00000000000..68ed02d1dc0 --- /dev/null +++ b/tests/queries/0_stateless/02703_row_policies_for_database_combination.reference @@ -0,0 +1,42 @@ +None +1 10 +2 20 +3 30 +4 40 +R1: x == 1 +1 10 +R1, R2: (x == 1) OR (x == 2) +1 10 +2 20 +R1, R2: (x == 2) FROM ANOTHER +2 20 +R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3) +1 10 +2 20 +3 30 +R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) +1 10 +2 20 +R1, R2, R3, R4, R5: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) AND (y >= 20) +2 20 +2 20 +R1, R2, R3, R4, R5: (x == 2) AND (y >= 20) FROM AFTER_RP +2 20 +R1, R2, R3, R4, R5: (x == 2) AND (y >= 20) FROM ANOTHER +2 20 +R2, R3, R4, R5: ((x == 2) OR (x == 3)) AND (x <= 2) AND (y >= 20) +2 20 +R3, R4, R5: (x == 3) AND (x <= 2) AND (y >= 20) +R4, R5: (x <= 2) AND (y >= 20) +2 20 +R5: (x >= 2) +2 20 +3 30 +4 40 +Policy not applicable +None +1 10 +2 20 +3 30 +4 40 +No problematic policy, select works diff --git a/tests/queries/0_stateless/02703_row_policies_for_database_combination.sql b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sql new file mode 100644 index 00000000000..f9b466f1ade --- /dev/null +++ b/tests/queries/0_stateless/02703_row_policies_for_database_combination.sql @@ -0,0 +1,88 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS 02703_db; +CREATE DATABASE 02703_db; +DROP TABLE IF EXISTS 02703_db.02703_rptable; +DROP TABLE IF EXISTS 02703_db.02703_rptable_another; +CREATE TABLE 02703_db.02703_rptable (x UInt8, y UInt8) ENGINE = MergeTree ORDER BY x; + +INSERT INTO 02703_db.02703_rptable VALUES (1, 10), (2, 20), (3, 30), (4, 40); + +CREATE TABLE 02703_db.02703_rptable_another ENGINE = MergeTree ORDER BY x AS SELECT * FROM 02703_db.02703_rptable; + + +DROP ROW POLICY IF EXISTS 02703_filter_1 ON 02703_db.02703_rptable; +DROP ROW POLICY IF EXISTS 02703_filter_2 ON 02703_db.*; +DROP ROW POLICY IF EXISTS 02703_filter_3 ON 02703_db.02703_rptable; +DROP ROW POLICY IF EXISTS 02703_filter_4 ON 02703_db.02703_rptable; +DROP ROW POLICY IF EXISTS 02703_filter_5 ON 02703_db.*; + +-- the test assumes users_without_row_policies_can_read_rows is true + +SELECT 'None'; +SELECT * FROM 02703_db.02703_rptable; + +CREATE ROW POLICY 02703_filter_1 ON 02703_db.02703_rptable USING x=1 AS permissive TO ALL; +SELECT 'R1: x == 1'; +SELECT * FROM 02703_db.02703_rptable; + +CREATE ROW POLICY 02703_filter_2 ON 02703_db.* USING x=2 AS permissive TO ALL; +SELECT 'R1, R2: (x == 1) OR (x == 2)'; +SELECT * FROM 02703_db.02703_rptable; + +SELECT 'R1, R2: (x == 2) FROM ANOTHER'; +SELECT * FROM 02703_db.02703_rptable_another; + +CREATE ROW POLICY 02703_filter_3 ON 02703_db.02703_rptable USING x=3 AS permissive TO ALL; +SELECT 'R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3)'; +SELECT * FROM 02703_db.02703_rptable; + +CREATE ROW POLICY 02703_filter_4 ON 02703_db.02703_rptable USING x<=2 AS restrictive TO ALL; +SELECT 'R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2)'; +SELECT * FROM 02703_db.02703_rptable; + +CREATE ROW POLICY 02703_filter_5 ON 02703_db.* USING y>=20 AS restrictive TO ALL; +SELECT 'R1, R2, R3, R4, R5: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) AND (y >= 20)'; +SELECT * FROM 02703_db.02703_rptable; + +CREATE TABLE 02703_db.02703_after_rp ENGINE = MergeTree ORDER BY x AS SELECT * FROM 02703_db.02703_rptable; +SELECT * FROM 02703_db.02703_after_rp; + +-- does not matter if policies or table are created first +SELECT 'R1, R2, R3, R4, R5: (x == 2) AND (y >= 20) FROM AFTER_RP'; +SELECT * FROM 02703_db.02703_after_rp; + +SELECT 'R1, R2, R3, R4, R5: (x == 2) AND (y >= 20) FROM ANOTHER'; +SELECT * FROM 02703_db.02703_rptable_another; + +DROP ROW POLICY 02703_filter_1 ON 02703_db.02703_rptable; +SELECT 'R2, R3, R4, R5: ((x == 2) OR (x == 3)) AND (x <= 2) AND (y >= 20)'; +SELECT * FROM 02703_db.02703_rptable; + +DROP ROW POLICY 02703_filter_2 ON 02703_db.*; +SELECT 'R3, R4, R5: (x == 3) AND (x <= 2) AND (y >= 20)'; +SELECT * FROM 02703_db.02703_rptable; + +DROP ROW POLICY 02703_filter_3 ON 02703_db.02703_rptable; +SELECT 'R4, R5: (x <= 2) AND (y >= 20)'; +SELECT * FROM 02703_db.02703_rptable; + +DROP ROW POLICY 02703_filter_4 ON 02703_db.02703_rptable; +SELECT 'R5: (x >= 2)'; +SELECT * FROM 02703_db.02703_rptable; + +CREATE TABLE 02703_db.02703_unexpected_columns (xx UInt8, yy UInt8) ENGINE = MergeTree ORDER BY xx; +SELECT 'Policy not applicable'; +SELECT * FROM 02703_db.02703_unexpected_columns; -- { serverError 47 } -- Missing columns: 'x' while processing query + +DROP ROW POLICY 02703_filter_5 ON 02703_db.*; +SELECT 'None'; +SELECT * FROM 02703_db.02703_rptable; + +SELECT 'No problematic policy, select works'; +SELECT 'Ok' FROM 02703_db.02703_unexpected_columns; + +DROP TABLE 02703_db.02703_rptable; +DROP TABLE 02703_db.02703_rptable_another; +DROP TABLE 02703_db.02703_unexpected_columns; +DROP DATABASE 02703_db; diff --git a/tests/queries/0_stateless/02703_row_policy_for_database.reference b/tests/queries/0_stateless/02703_row_policy_for_database.reference new file mode 100644 index 00000000000..ec03e538d95 --- /dev/null +++ b/tests/queries/0_stateless/02703_row_policy_for_database.reference @@ -0,0 +1,20 @@ +-- row policies for database + -- SHOW CREATE POLICY db1_02703 ON db1_02703.* +CREATE ROW POLICY db1_02703 ON db1_02703.* FOR SELECT USING 1 TO ALL + -- SHOW CREATE POLICY ON db1_02703.* +CREATE ROW POLICY db1_02703 ON db1_02703.* FOR SELECT USING 1 TO ALL +CREATE ROW POLICY tbl1_02703 ON db1_02703.table FOR SELECT USING 1 TO ALL + -- SHOW CREATE POLICY ON db1_02703.`*` +R1, R2: (x == 1) OR (x == 2) +1 +2 +Check system.query_log +SELECT \'-- row policies for database\'; [] +SELECT \' -- SHOW CREATE POLICY db1_02703 ON db1_02703.*\'; [] +SELECT \' -- SHOW CREATE POLICY ON db1_02703.*\'; [] +SELECT \' -- SHOW CREATE POLICY ON db1_02703.`*`\'; [] +SELECT \'R1, R2: (x == 1) OR (x == 2)\'; [] +SELECT * FROM 02703_rqtable_default; ['`02703_filter_11_db` ON default.*','`02703_filter_11` ON default.`02703_rqtable_default`'] +SELECT \'Check system.query_log\'; [] + -- CREATE DATABASE-LEVEL POLICY IN CURRENT DATABASE +CREATE ROW POLICY db2_02703 ON db1_02703.* TO u1_02703 diff --git a/tests/queries/0_stateless/02703_row_policy_for_database.sql b/tests/queries/0_stateless/02703_row_policy_for_database.sql new file mode 100644 index 00000000000..03183a96b98 --- /dev/null +++ b/tests/queries/0_stateless/02703_row_policy_for_database.sql @@ -0,0 +1,53 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS db1_02703; +DROP USER IF EXISTS u1_02703; +CREATE USER u1_02703; + +CREATE DATABASE db1_02703; + +CREATE TABLE db1_02703.02703_rqtable (x UInt8) ENGINE = MergeTree ORDER BY x; +INSERT INTO db1_02703.02703_rqtable VALUES (1), (2), (3), (4); + + +SELECT '-- row policies for database'; +CREATE ROW POLICY db1_02703 ON db1_02703.* USING 1 AS PERMISSIVE TO ALL; +CREATE ROW POLICY tbl1_02703 ON db1_02703.table USING 1 AS PERMISSIVE TO ALL; +SELECT ' -- SHOW CREATE POLICY db1_02703 ON db1_02703.*'; +SHOW CREATE POLICY db1_02703 ON db1_02703.*; +SELECT ' -- SHOW CREATE POLICY ON db1_02703.*'; +SHOW CREATE POLICY ON db1_02703.*; +SELECT ' -- SHOW CREATE POLICY ON db1_02703.`*`'; +SHOW CREATE POLICY ON db1_02703.`*`; +DROP POLICY db1_02703 ON db1_02703.*; +DROP POLICY tbl1_02703 ON db1_02703.table; + +CREATE ROW POLICY any_02703 ON *.some_table USING 1 AS PERMISSIVE TO ALL; -- { clientError 62 } + +CREATE TABLE 02703_rqtable_default (x UInt8) ENGINE = MergeTree ORDER BY x; + +CREATE ROW POLICY 02703_filter_11_db ON * USING x=1 AS permissive TO ALL; +CREATE ROW POLICY 02703_filter_11 ON 02703_rqtable_default USING x=2 AS permissive TO ALL; + +INSERT INTO 02703_rqtable_default VALUES (1), (2), (3), (4); + +SELECT 'R1, R2: (x == 1) OR (x == 2)'; +SELECT * FROM 02703_rqtable_default; + +DROP TABLE 02703_rqtable_default; + +SELECT 'Check system.query_log'; +SYSTEM FLUSH LOGS; +SELECT query, used_row_policies FROM system.query_log WHERE current_database == currentDatabase() AND type == 'QueryStart' AND query_kind == 'Select' ORDER BY event_time_microseconds; + +DROP ROW POLICY 02703_filter_11_db ON *; +DROP ROW POLICY 02703_filter_11 ON 02703_rqtable_default; + +USE db1_02703; +SELECT ' -- CREATE DATABASE-LEVEL POLICY IN CURRENT DATABASE'; +CREATE ROW POLICY db2_02703 ON * TO u1_02703; +SHOW CREATE POLICY db2_02703 ON *; + +DROP ROW POLICY db2_02703 ON *; + +DROP USER u1_02703; diff --git a/tests/queries/0_stateless/02703_storage_s3_race.reference b/tests/queries/0_stateless/02703_storage_s3_race.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02703_storage_s3_race.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02703_storage_s3_race.sh b/tests/queries/0_stateless/02703_storage_s3_race.sh new file mode 100755 index 00000000000..65a38e600f7 --- /dev/null +++ b/tests/queries/0_stateless/02703_storage_s3_race.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +filename="test_${CLICKHOUSE_DATABASE}_${RANDOM}" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_s3_race" +$CLICKHOUSE_CLIENT --query "CREATE TABLE test_s3_race (u UInt64) ENGINE = S3(s3_conn, filename='$filename', format='CSV')" +$CLICKHOUSE_CLIENT --s3_truncate_on_insert 1 --query "INSERT INTO test_s3_race VALUES (1)" + +$CLICKHOUSE_BENCHMARK -i 100 -c 4 <<< "SELECT * FROM test_s3_race" >/dev/null 2>&1 +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_s3_race" +echo "OK" diff --git a/tests/queries/0_stateless/02704_keeper_map_zk_nodes.reference b/tests/queries/0_stateless/02704_keeper_map_zk_nodes.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh b/tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh new file mode 100755 index 00000000000..9689d4f5a50 --- /dev/null +++ b/tests/queries/0_stateless/02704_keeper_map_zk_nodes.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Tags: no-ordinary-database, zookeeper, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +table_name="02704_keeper_map_zk_nodes" +table_name_another="02704_keeper_map_zk_nodes_new_table" + +$CLICKHOUSE_CLIENT --multiquery --query=" +DROP TABLE IF EXISTS $table_name; +DROP TABLE IF EXISTS $table_name_another; +CREATE TABLE $table_name (key UInt64, value UInt64) +ENGINE = KeeperMap('/$table_name/$CLICKHOUSE_DATABASE') +PRIMARY KEY(key)" + +function assert_children_size() +{ + for _ in `seq 10` + do + children_size=$($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.zookeeper WHERE path = '$1'") + if [ $children_size == $2 ] + then + return + fi + + sleep 0.4 + done + + echo "Invalid number of children for path '$1': actual $children_size, expected $2" + exit 1 +} + +function assert_root_children_size() +{ + assert_children_size "/test_keeper_map/02704_keeper_map_zk_nodes/$CLICKHOUSE_DATABASE" $1 +} + +function assert_data_children_size() +{ + assert_children_size "/test_keeper_map/02704_keeper_map_zk_nodes/$CLICKHOUSE_DATABASE/data" $1 +} + +assert_root_children_size 2 +assert_data_children_size 0 + +$CLICKHOUSE_CLIENT --query="INSERT INTO $table_name VALUES (1, 11)" + +assert_data_children_size 1 + +$CLICKHOUSE_CLIENT --query=" +CREATE TABLE $table_name_another (key UInt64, value UInt64) +ENGINE = KeeperMap('/$table_name/$CLICKHOUSE_DATABASE') +PRIMARY KEY(key)" + +assert_root_children_size 2 +assert_data_children_size 1 + +$CLICKHOUSE_CLIENT --query="INSERT INTO $table_name_another VALUES (1, 11)" + +assert_root_children_size 2 +assert_data_children_size 1 + +$CLICKHOUSE_CLIENT --query="INSERT INTO $table_name_another VALUES (2, 22)" + +assert_root_children_size 2 +assert_data_children_size 2 + +$CLICKHOUSE_CLIENT --query="DROP TABLE $table_name" + +assert_root_children_size 2 +assert_data_children_size 2 + +$CLICKHOUSE_CLIENT --query="DROP TABLE $table_name_another" + +assert_root_children_size 0 diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.reference b/tests/queries/0_stateless/02704_max_backup_bandwidth.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh new file mode 100755 index 00000000000..b5d32d2059d --- /dev/null +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; +" + +# reading 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds +$CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT + query_duration_ms >= 7e3, + ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6 + FROM system.query_log + WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart' +" diff --git a/tests/queries/0_stateless/02704_storage_merge_explain_graph_crash.reference b/tests/queries/0_stateless/02704_storage_merge_explain_graph_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02704_storage_merge_explain_graph_crash.sql b/tests/queries/0_stateless/02704_storage_merge_explain_graph_crash.sql new file mode 100644 index 00000000000..44a8fe4f049 --- /dev/null +++ b/tests/queries/0_stateless/02704_storage_merge_explain_graph_crash.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS foo; +DROP TABLE IF EXISTS foo2; +DROP TABLE IF EXISTS foo2_dist; +DROP TABLE IF EXISTS merge1; + +CREATE TABLE foo (`Id` Int32, `Val` Int32) ENGINE = MergeTree ORDER BY Id; +INSERT INTO foo SELECT number, number FROM numbers(100); + +CREATE TABLE foo2 (`Id` Int32, `Val` Int32) ENGINE = MergeTree ORDER BY Id; +INSERT INTO foo2 SELECT number, number FROM numbers(100); +CREATE TABLE foo2_dist (`Id` UInt32, `Val` String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), foo2); + +CREATE TABLE merge1 AS foo ENGINE = Merge(currentDatabase(), '^(foo|foo2_dist)$'); + +EXPLAIN PIPELINE graph = 1, compact = 1 SELECT * FROM merge1 FORMAT Null; +EXPLAIN PIPELINE graph = 1, compact = 1 SELECT * FROM merge1 FORMAT Null SETTINGS allow_experimental_analyzer=1; diff --git a/tests/queries/0_stateless/02705_capnp_more_types.reference b/tests/queries/0_stateless/02705_capnp_more_types.reference new file mode 100644 index 00000000000..9cacd1fe527 --- /dev/null +++ b/tests/queries/0_stateless/02705_capnp_more_types.reference @@ -0,0 +1,3 @@ +42 42 42 42 42.42 42.42 +{'Hello':42,'World':24} +42 42 42 42 42 42 42 42 diff --git a/tests/queries/0_stateless/02705_capnp_more_types.sh b/tests/queries/0_stateless/02705_capnp_more_types.sh new file mode 100755 index 00000000000..1f6b1f7c86b --- /dev/null +++ b/tests/queries/0_stateless/02705_capnp_more_types.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-replicated-database + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SCHEMADIR=$CURDIR/format_schemas +$CLICKHOUSE_LOCAL -q "select 42::Int128 as int128, 42::UInt128 as uint128, 42::Int256 as int256, 42::UInt256 as uint256, 42.42::Decimal128(2) as decimal128, 42.42::Decimal256(2) as decimal256 format CapnProto settings format_schema='$SCHEMADIR/02705_big_numbers:Message'" | $CLICKHOUSE_LOCAL --input-format CapnProto --structure "int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256, decimal128 Decimal128(2), decimal256 Decimal256(2)" -q "select * from table" --format_schema="$SCHEMADIR/02705_big_numbers:Message" + +$CLICKHOUSE_LOCAL -q "select map('Hello', 42, 'World', 24) as map format CapnProto settings format_schema='$SCHEMADIR/02705_map:Message'" | $CLICKHOUSE_LOCAL --input-format CapnProto --structure "map Map(String, UInt32)" --format_schema="$SCHEMADIR/02705_map:Message" -q "select * from table" + + +$CLICKHOUSE_LOCAL -q "select 42 as int8, 42 as uint8, 42 as int16, 42 as uint16, 42 as int32, 42 as uint32, 42 as int64, 42 as uint64 format CapnProto settings format_schema='$SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_LOCAL --input-format CapnProto --structure "int8 UInt32, uint8 Int32, int16 Int8, uint16 UInt8, int32 UInt64, uint32 Int64, int64 UInt16, uint64 Int16" --format_schema="$SCHEMADIR/02030_capnp_simple_types:Message" -q "select * from table" + + + diff --git a/tests/queries/0_stateless/02705_grouping_keys_equal_keys.reference b/tests/queries/0_stateless/02705_grouping_keys_equal_keys.reference new file mode 100644 index 00000000000..a9e2f17562a --- /dev/null +++ b/tests/queries/0_stateless/02705_grouping_keys_equal_keys.reference @@ -0,0 +1,6 @@ +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02705_grouping_keys_equal_keys.sql b/tests/queries/0_stateless/02705_grouping_keys_equal_keys.sql new file mode 100644 index 00000000000..fcf5b4d2ce5 --- /dev/null +++ b/tests/queries/0_stateless/02705_grouping_keys_equal_keys.sql @@ -0,0 +1,7 @@ +SELECT count() +FROM numbers(2) +GROUP BY +GROUPING SETS ( + (number, number + 0, number + 1), + (number % 1048576, number % -9223372036854775808), + (number / 2, number / 2)); diff --git a/tests/queries/0_stateless/02705_projection_and_ast_optimizations_bug.reference b/tests/queries/0_stateless/02705_projection_and_ast_optimizations_bug.reference new file mode 100644 index 00000000000..9049324c392 --- /dev/null +++ b/tests/queries/0_stateless/02705_projection_and_ast_optimizations_bug.reference @@ -0,0 +1 @@ +-2.5574077246549023 0.6663667453928805 1 diff --git a/tests/queries/0_stateless/02705_projection_and_ast_optimizations_bug.sql b/tests/queries/0_stateless/02705_projection_and_ast_optimizations_bug.sql new file mode 100644 index 00000000000..5589fbeeb9e --- /dev/null +++ b/tests/queries/0_stateless/02705_projection_and_ast_optimizations_bug.sql @@ -0,0 +1,6 @@ +drop table if exists t1; +CREATE TABLE t1 (c0 Int32) ENGINE = MergeTree() ORDER BY c0 PARTITION BY (- (c0)); +insert into t1 values(1); +SELECT (- ((((tan (t1.c0)))+(t1.c0)))), (cos ((sin (pow(t1.c0,t1.c0))))), ((gcd((- (t1.c0)),((t1.c0)+(t1.c0))))*((- ((- (t1.c0)))))) FROM t1 GROUP BY (sqrt ((- (t1.c0)))), t1.c0, pow((erf ((- (t1.c0)))),t1.c0); +drop table t1; + diff --git a/tests/queries/0_stateless/02705_protobuf_debug_abort.reference b/tests/queries/0_stateless/02705_protobuf_debug_abort.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02705_protobuf_debug_abort.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02705_protobuf_debug_abort.sh b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh new file mode 100755 index 00000000000..ec564d4c6fc --- /dev/null +++ b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo 'syntax = "proto3"; + +message Message { + NotExisted x = 1; +}' > 02705_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto + + +$CLICKHOUSE_LOCAL -q "select * from file(data.bin, Protobuf) settings format_schema='02705_schema_$CLICKHOUSE_TEST_UNIQUE_NAME:Message'" 2>&1 | grep -c "CANNOT_PARSE_PROTOBUF_SCHEMA" + +rm 02705_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto + diff --git a/tests/queries/0_stateless/02705_settings_check_changed_flag.reference b/tests/queries/0_stateless/02705_settings_check_changed_flag.reference new file mode 100644 index 00000000000..58a93072157 --- /dev/null +++ b/tests/queries/0_stateless/02705_settings_check_changed_flag.reference @@ -0,0 +1,32 @@ +0 +1 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 +0 +1 +1 +1 diff --git a/tests/queries/0_stateless/02705_settings_check_changed_flag.sql b/tests/queries/0_stateless/02705_settings_check_changed_flag.sql new file mode 100644 index 00000000000..151e7a66b71 --- /dev/null +++ b/tests/queries/0_stateless/02705_settings_check_changed_flag.sql @@ -0,0 +1,82 @@ +---SettingFieldNumber +SELECT changed from system.settings where name = 'mysql_max_rows_to_insert'; +SET mysql_max_rows_to_insert = 123123; + +select changed from system.settings where name = 'mysql_max_rows_to_insert'; +set mysql_max_rows_to_insert = 123123; +select changed from system.settings where name = 'mysql_max_rows_to_insert'; +set mysql_max_rows_to_insert = 65536; +select changed from system.settings where name = 'mysql_max_rows_to_insert'; + +---SettingAutoWrapper + +select changed from system.settings where name = 'insert_quorum'; +set insert_quorum = 123123; +select changed from system.settings where name = 'insert_quorum'; +set insert_quorum = 123123; +select changed from system.settings where name = 'insert_quorum'; +set insert_quorum = 0; +select changed from system.settings where name = 'insert_quorum'; + +---SettingFieldMaxThreads + +select changed from system.settings where name = 'max_alter_threads'; +set max_alter_threads = 123123; +select changed from system.settings where name = 'max_alter_threads'; +set max_alter_threads = 123123; +select changed from system.settings where name = 'max_alter_threads'; +set max_alter_threads = 0; +select changed from system.settings where name = 'max_alter_threads'; + +---SettingFieldTimespanUnit + +select changed from system.settings where name = 'drain_timeout'; +set drain_timeout = 123123; +select changed from system.settings where name = 'drain_timeout'; +set drain_timeout = 123123; +select changed from system.settings where name = 'drain_timeout'; +set drain_timeout = 3; +select changed from system.settings where name = 'drain_timeout'; + + +---SettingFieldChar + +select changed from system.settings where name = 'format_csv_delimiter'; +set format_csv_delimiter = ','; +select changed from system.settings where name = 'format_csv_delimiter'; +set format_csv_delimiter = ','; +select changed from system.settings where name = 'format_csv_delimiter'; +set format_csv_delimiter = ','; +select changed from system.settings where name = 'format_csv_delimiter'; + + +---SettingFieldURI + +select changed from system.settings where name = 'format_avro_schema_registry_url'; +set format_avro_schema_registry_url = 'https://github.com/ClickHouse/ClickHouse/tree/master/src/Core'; +select changed from system.settings where name = 'format_avro_schema_registry_url'; +set format_avro_schema_registry_url = 'https://github.com/ClickHouse/ClickHouse/tree/master/src/Core'; +select changed from system.settings where name = 'format_avro_schema_registry_url'; +set format_avro_schema_registry_url = ''; +select changed from system.settings where name = 'format_avro_schema_registry_url'; + + +--- SettingFieldEnum + +select changed from system.settings where name = 'output_format_orc_compression_method'; +set output_format_orc_compression_method = 'none'; +select changed from system.settings where name = 'output_format_orc_compression_method'; +set output_format_orc_compression_method = 'none'; +select changed from system.settings where name = 'output_format_orc_compression_method'; +set output_format_orc_compression_method = 'lz4'; +select changed from system.settings where name = 'output_format_orc_compression_method'; + +--- SettingFieldMultiEnum + +select changed from system.settings where name = 'join_algorithm'; +set join_algorithm = 'auto,direct'; +select changed from system.settings where name = 'join_algorithm'; +set join_algorithm = 'auto,direct'; +select changed from system.settings where name = 'join_algorithm'; +set join_algorithm = 'default'; +select changed from system.settings where name = 'join_algorithm'; diff --git a/tests/queries/0_stateless/02706_array_map_tuples.reference b/tests/queries/0_stateless/02706_array_map_tuples.reference new file mode 100644 index 00000000000..a44843f1a35 --- /dev/null +++ b/tests/queries/0_stateless/02706_array_map_tuples.reference @@ -0,0 +1,3 @@ +[(2,1)] +[1] +[(3,2)] diff --git a/tests/queries/0_stateless/02706_array_map_tuples.sql b/tests/queries/0_stateless/02706_array_map_tuples.sql new file mode 100644 index 00000000000..205e15c7de2 --- /dev/null +++ b/tests/queries/0_stateless/02706_array_map_tuples.sql @@ -0,0 +1,6 @@ +WITH [(1, 2)] AS arr1 SELECT arrayMap((x, y) -> (y, x), arr1); +WITH [(1, 2)] AS arr1 SELECT arrayMap(x -> x.1, arr1); +WITH [(1, 2)] AS arr1, [(3, 4)] AS arr2 SELECT arrayMap((x, y) -> (y.1, x.2), arr1, arr2); + +WITH [(1, 2)] AS arr1 SELECT arrayMap((x, y, z) -> (y, x, z), arr1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +WITH [1, 2] AS arr1 SELECT arrayMap((x, y) -> (y, x), arr1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02706_arrow_different_dictionaries.reference b/tests/queries/0_stateless/02706_arrow_different_dictionaries.reference new file mode 100644 index 00000000000..eda4c04d605 --- /dev/null +++ b/tests/queries/0_stateless/02706_arrow_different_dictionaries.reference @@ -0,0 +1,20 @@ +0 +0 +1 +1 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 diff --git a/tests/queries/0_stateless/02706_arrow_different_dictionaries.sh b/tests/queries/0_stateless/02706_arrow_different_dictionaries.sh new file mode 100755 index 00000000000..d3958aa768d --- /dev/null +++ b/tests/queries/0_stateless/02706_arrow_different_dictionaries.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number % 10)) as x from numbers(20) format Arrow settings max_block_size=7, output_format_arrow_low_cardinality_as_dictionary=1" | $CLICKHOUSE_LOCAL -q "select * from table order by x" --input-format='Arrow' + diff --git a/tests/queries/0_stateless/02706_keeper_map_insert_strict.reference b/tests/queries/0_stateless/02706_keeper_map_insert_strict.reference new file mode 100644 index 00000000000..a6bdbb192e4 --- /dev/null +++ b/tests/queries/0_stateless/02706_keeper_map_insert_strict.reference @@ -0,0 +1,3 @@ +1 1.1 +1 2.1 +1 2.1 diff --git a/tests/queries/0_stateless/02706_keeper_map_insert_strict.sql b/tests/queries/0_stateless/02706_keeper_map_insert_strict.sql new file mode 100644 index 00000000000..97c801ec46e --- /dev/null +++ b/tests/queries/0_stateless/02706_keeper_map_insert_strict.sql @@ -0,0 +1,20 @@ +-- Tags: no-ordinary-database, no-fasttest + +DROP TABLE IF EXISTS 02706_keeper_map_insert_strict SYNC; + +CREATE TABLE 02706_keeper_map_insert_strict (key UInt64, value Float64) Engine=KeeperMap('/' || currentDatabase() || '/test_02706_keeper_map_insert_strict') PRIMARY KEY(key); + +INSERT INTO 02706_keeper_map_insert_strict VALUES (1, 1.1), (2, 2.2); +SELECT * FROM 02706_keeper_map_insert_strict WHERE key = 1; + +SET keeper_map_strict_mode = false; + +INSERT INTO 02706_keeper_map_insert_strict VALUES (1, 2.1); +SELECT * FROM 02706_keeper_map_insert_strict WHERE key = 1; + +SET keeper_map_strict_mode = true; + +INSERT INTO 02706_keeper_map_insert_strict VALUES (1, 2.1); -- { serverError KEEPER_EXCEPTION } +SELECT * FROM 02706_keeper_map_insert_strict WHERE key = 1; + +DROP TABLE 02706_keeper_map_insert_strict; diff --git a/tests/queries/0_stateless/02706_kolmogorov_smirnov_test.reference b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test.reference new file mode 100644 index 00000000000..d2e9e7de5d4 --- /dev/null +++ b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test.reference @@ -0,0 +1,3 @@ +0.1 0.1 1 0.05 0.1 1 0.05 0.1 1 0.05 0.099562 1 0.018316 1 1 -0 1 1 -0 1 1 -0 1 1 -0 1 +0.000007 0.000007 0.000004 0.000023 0.000007 0.000004 0.000023 0.000007 0.000004 0.000023 0.000008 0.000003 0.00002 0.158 0.158 0.158 0.146 0.158 0.158 0.146 0.158 0.158 0.146 0.158 0.158 0.146 +0 0 0 0.523357 0 0 0.523357 0 0 0.523357 0 0 0.504595 0.486 0.486 0.486 0.036 0.486 0.486 0.036 0.486 0.486 0.036 0.486 0.486 0.036 diff --git a/tests/queries/0_stateless/02706_kolmogorov_smirnov_test.sql b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test.sql new file mode 100644 index 00000000000..3199b6968c0 --- /dev/null +++ b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test.sql @@ -0,0 +1,107 @@ +DROP TABLE IF EXISTS kstest; + +CREATE TABLE kstest (left Float64, right Float64) ENGINE = Memory; + +INSERT INTO kstest VALUES (0.010268, 0), (0.000167, 0), (0.000167, 0), (0.159258, 1), (0.136278, 1), (0.122389, 1); + +SELECT +roundBankers(kolmogorovSmirnovTest(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','asymp')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','asymp')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','asymp')(left, right).2, 6) , +roundBankers(kolmogorovSmirnovTest(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','asymp')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','asymp')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','asymp')(left, right).1, 6) +from kstest; + +DROP TABLE IF EXISTS kstest; + +CREATE TABLE kstest (left Float64, right Float64) ENGINE = Memory; + +INSERT INTO kstest VALUES (14.72789, 0), (9.61661, 0), (13.57615, 0), (3.98392, 0), (11.98889, 0), (10.99422, 0), (5.44792, 0), (20.29346, 0), (7.05926, 0), (9.22732, 0), (12.06847, 0), (13.52612, 0), (8.24597, 0), (9.35245, 0), (10.12297, 0), (15.80624, 0), (13.68613, 0), (10.72729, 0), (5.62078, 0), (6.12229, 0), (6.03801, 0), (8.95585, 0), (24.04613, 0), (9.04757, 0), (2.68263, 0), (15.43935, 0), (2.89423, 0), (4.01423, 0), (4.30568, 0), (11.99948, 0), (8.40574, 0), (10.86642, 0), (9.4266, 0), (-8.12752, 0), (7.91634, 0), (7.3967, 0), (2.26431, 0), (14.20118, 0), (6.68233, 0), (15.46221, 0), (7.88467, 0), (11.20011, 0), (8.92027, 0), (10.27926, 0), (5.14395, 0), (5.62178, 0), (12.84383, 0), (9.98009, 0), (-0.69789, 0), (11.41386, 0), (7.76863, 0), (7.21743, 0), (1.81176, 0), (9.43762, 0), (19.22117, 0), (2.97128, 0), (14.32851, 0), (7.54959, 0), (3.81545, 0), (10.1281, 0), (2.48596, 0), (10.0461, 0), (3.59714, 0), (9.73522, 0), (18.8077, 0), (3.15148, 0), (12.26062, 0), (5.66707, 0), (6.58623, 0), (17.30902, 0), (9.91391, 0), (5.36946, 0), (15.73637, 0), (16.96281, 0), (11.54063, 0), (18.37358, 0), (11.38255, 0), (10.53256, 0), (8.08833, 0), (16.27556, 0), (2.42969, 0), (9.56127, 0), (7.32998, 0), (9.19511, 0), (9.66903, 0), (4.15029, 0), (8.83511, 0), (14.60617, 0), (14.06143, 0), (5.39556, 0), (10.11871, 0), (10.56619, 0), (14.4462, 0), (10.42106, 0), (7.75551, 0), (11.00418, 0), (4.47226, 0), (16.35461, 0), (18.55174, 0), (11.82044, 0), (7.39454, 0), (11.27767, 0), (6.83827, 0), (7.76858, 0), (15.97614, 0), (14.53781, 0), (12.99546, 0), (16.91151, 0), (9.65012, 0), (14.25487, 0), (14.03618, 0), (2.57382, 0), (2.50779, 0), (14.24787, 0), (13.34666, 0), (7.31102, 0), (10.22981, 0), (17.4435, 0), (21.2074, 0), (6.64191, 0), (18.7086, 0), (14.78686, 0), (9.85287, 0), (4.48263, 0), (14.17469, 0), (14.4342, 0), (19.2481, 0), (3.47165, 0), (8.28712, 0), (8.81657, 0), (0.92319, 0), (20.41106, 0), (6.76127, 0), (22.00242, 0), (8.66129, 0), (10.9929, 0), (17.95494, 0), (17.20996, 0), (12.18888, 0), (12.14257, 0), (15.81243, 0), (4.43362, 0), (1.17567, 0), (15.60881, 0), (9.34833, 0), (6.33513, 0), (-0.83095, 0), (12.43268, 0), (6.63207, 0), (11.96877, 0), (14.81029, 0), (21.84876, 0), (3.75896, 0), (6.91307, 0), (13.73015, 0), (8.63753, 0), (15.71679, 0), (1.74565, 0), (9.16895, 0), (5.70685, 0), (5.00117, 0), (13.06888, 0), (7.51204, 0), (15.34885, 0), (5.20264, 0), (8.59043, 0), (6.45619, 0), (14.61979, 0), (11.7075, 0), (14.04901, 0), (4.20525, 0), (15.1733, 0), (3.12934, 0), (8.08049, 0), (15.41273, 0), (16.90751, 0), (5.86893, 0), (7.1086, 0), (4.418, 0), (12.0614, 0), (7.07887, 0), (3.61585, 0), (11.73001, 0), (10.80449, 0), (8.40311, 0), (9.91276, 0), (16.4164, 0), (5.25034, 0), (15.20283, 0), (10.42909, 0), (9.53888, 0), (14.68939, 0), (6.60007, 0), (18.31058, 0), (7.01885, 0), (18.71631, 0), (10.50002, 0), (10.7517, 0), (4.23224, 0), (2.28924, 0), (8.56059, 0), (8.25095, 0), (9.15673, 0), (13.28409, 0), (8.4513, 0), (2.83911, 0), (2.79676, 0), (9.11055, 0), (7.18529, 0), (-4.1258, 0), (5.28306, 0), (6.82757, 0), (10.89035, 0), (5.24822, 0), (11.935, 0), (6.45675, 0), (10.18088, 0), (4.9932, 0), (18.09939, 0), (8.11738, 0), (5.37883, 0), (10.50339, 0), (16.64093, 0), (14.77263, 0), (13.71385, 0), (6.98746, 0), (10.74635, 0), (5.49432, 0), (13.46078, 0), (10.67565, 0), (9.0291, 0), (11.51417, 0), (13.07118, 0), (9.5049, 0), (8.50611, 0), (6.47606, 0), (13.06526, 0), (19.08658, 0), (9.49741, 0), (10.60865, 0), (2.28996, 0), (8.12846, 0), (5.62241, 0), (4.07712, 0), (17.98526, 0), (9.466, 0), (11.38904, 0), (5.91826, 0), (1.52059, 0), (18.79161, 0), (18.20669, 0), (-1.67829, 0), (18.01586, 0), (16.31577, 0), (7.88281, 0), (8.46179, 0), (10.31113, 0), (14.88377, 0), (1.31835, 0), (2.53176, 0), (9.48625, 0), (3.97936, 0), (11.52319, 0), (13.24178, 0), (7.58739, 0), (10.00959, 0), (9.73361, 0), (8.35716, 0), (1.65491, 0), (11.11521, 0), (6.08355, 0), (10.04582, 0), (11.58237, 0), (16.40249, 0), (1.9691, 0), (13.22776, 0), (2.67059, 0), (9.83651, 0), (2.12539, 0), (9.27114, 0), (9.0699, 0), (2.78179, 0), (12.49311, 0), (12.97662, 0), (15.06359, 0), (16.91565, 0), (5.92011, 0), (5.81304, 0), (8.46425, 0), (9.48705, 0), (4.68191, 0), (5.70028, 0), (-0.78798, 0), (10.03442, 0), (15.45433, 0), (9.43845, 0), (3.05825, 0), (6.92126, 0), (14.05905, 0), (19.71579, 0), (15.0131, 0), (4.50386, 0), (1.31061, 0), (10.81197, 0), (14.32942, 0), (9.26469, 0), (7.27679, 0), (22.69295, 0), (12.03763, 0), (7.34876, 0), (16.60689, 0), (7.48786, 0), (15.78602, 0), (17.21048, 0), (13.93482, 0), (9.69911, 0), (12.24315, 0), (10.58131, 0), (19.57006, 0), (9.8856, 0), (11.70302, 0), (7.89864, 0), (12.24831, 0), (16.93707, 0), (9.65467, 0), (4.221, 0), (15.45229, 0), (12.83088, 0), (7.58313, 0), (12.895, 0), (10.02471, 0), (13.36059, 0), (5.07864, 0), (9.72017, 0), (11.05809, 0), (15.28528, 0), (13.99834, 0), (19.26989, 0), (9.41846, 0), (11.65425, 0), (8.49638, 0), (6.38592, 0), (-4.69837, 0), (12.22061, 0), (9.41331, 0), (13.2075, 0), (12.97005, 0), (11.44352, 0), (9.79805, 0), (6.93116, 0), (10.07691, 0), (22.05892, 0), (7.80353, 0), (-2.17276, 0), (0.61509, 0), (8.35842, 0), (17.77108, 0), (14.70841, 0), (1.27992, 0), (15.62699, 0), (9.32914, 0), (15.41866, 0), (10.82009, 0), (3.29902, 0), (9.21998, 0), (7.93845, 0), (10.33344, 0), (12.06399, 0), (5.5308, 0), (8.38727, 0), (18.11104, 0), (8.86565, 0), (19.41825, 0), (9.52376, 0), (3.94552, 0), (9.37587, 0), (15.44954, 0), (15.90527, 0), (13.18927, 0), (7.01646, 0), (9.06005, 0), (9.06431, 0), (5.76006, 0), (9.18705, 0), (-3.48446, 0), (15.89817, 0), (12.94719, 0), (23.69426, 0), (17.47755, 0), (15.61528, 0), (0.54832, 0), (14.32916, 0), (9.55305, 0), (13.79891, 0), (0.82544, 0), (13.34875, 0), (9.07614, 0), (5.19621, 0), (2.1451, 0), (9.87726, 0), (8.45439, 0), (-1.41842, 0), (7.93598, 0), (11.23151, 0), (17.84458, 0), (7.02237, 0), (10.7842, 0), (4.42832, 0), (4.45044, 0), (1.50938, 0), (21.21651, 0), (6.2097, 0), (6.84354, 0), (18.53804, 0), (12.01072, 0), (4.8345, 0), (20.41587, 0), (14.48353, 0), (8.71116, 0), (12.42818, 0), (14.89244, 0), (8.03033, 0), (5.25917, 0), (2.30092, 0), (10.22504, 0), (15.37573, 0), (7.13666, 0), (4.45018, 0), (10.18405, 0), (3.91025, 0), (14.52304, 0), (13.14771, 0), (11.99219, 0), (9.21345, 0), (8.85106, 0), (12.91887, 0), (15.62308, 0), (11.88034, 0), (15.12097, 0), (11.58168, 0), (16.83051, 0), (5.25405, 0), (2.19976, 0), (4.56716, 0), (16.46053, 0), (5.61995, 0), (8.67704, 0), (5.62789, 0), (9.84815, 0), (13.05834, 0), (11.74205, 0), (3.88393, 0), (16.15321, 0), (4.83925, 0), (13.00334, 0), (4.4028, 0), (4.35794, 0), (4.47478, 0), (2.38713, 0), (4.25235, 0), (10.87509, 0), (9.82411, 0), (13.61518, 0), (10.25507, 0), (4.0335, 0), (10.69881, 0), (5.70321, 0), (6.96244, 0), (9.35874, 0), (6.28076, 0), (8.29015, 0), (6.88653, 0), (7.70687, 0), (8.2001, 0), (6.73415, 0), (3.82052, 0), (3.94469, 0), (15.82384, 0), (2.54004, 0), (10.74876, 0), (12.60517, 0), (17.7024, 0), (4.6722, 0), (13.67341, 0), (6.4565, 0), (12.95699, 0), (4.56912, 0), (5.58464, 0), (4.0638, 0), (13.05559, 0), (5.38269, 0), (0.16354, 0), (7.23962, 0), (7.38577, 0), (8.50951, 0), (13.72574, 0), (17.80421, 0), (3.01135, 0), (8.02608, 0), (14.23847, 0), (-8.65656, 1), (22.98234, 1), (23.80821, 1), (13.33939, 1), (-4.05537, 1), (23.5155, 1), (-6.45272, 1), (17.7903, 1), (11.463, 1), (5.28021, 1), (8.39157, 1), (6.02464, 1), (14.43732, 1), (15.76584, 1), (1.54391, 1), (1.24897, 1), (27.1507, 1), (7.71091, 1), (15.71846, 1), (32.97808, 1), (-1.79334, 1), (-9.23439, 1), (11.27838, 1), (0.72703, 1), (18.51557, 1), (9.16619, 1), (17.29624, 1), (-1.30208, 1), (-3.48018, 1), (10.12082, 1), (-8.01318, 1), (-14.22264, 1), (16.58174, 1), (-0.55975, 1), (5.61449, 1), (1.44626, 1), (7.89158, 1), (1.13369, 1), (-0.82609, 1), (12.23365, 1), (12.45443, 1), (14.46915, 1), (13.72627, 1), (18.41459, 1), (29.66702, 1), (1.51619, 1), (10.40078, 1), (3.33266, 1), (6.12036, 1), (11.86553, 1), (6.59422, 1), (22.0948, 1), (1.79623, 1), (14.29513, 1), (19.69162, 1), (-7.98033, 1), (5.48433, 1), (-2.28474, 1), (9.91876, 1), (10.64097, 1), (0.22523, 1), (17.01773, 1), (22.37388, 1), (14.04215, 1), (23.1244, 1), (18.96958, 1), (8.42663, 1), (3.7165, 1), (14.29366, 1), (23.50886, 1), (26.33722, 1), (26.72396, 1), (13.26287, 1), (12.97607, 1), (17.41838, 1), (8.63875, 1), (17.08943, 1), (23.15356, 1), (-4.4965, 1), (7.58895, 1), (26.04074, 1), (6.84245, 1), (20.56287, 1), (3.84735, 1), (-2.76304, 1), (13.1615, 1), (8.21954, 1), (-3.49943, 1), (22.12419, 1), (7.08323, 1), (16.12937, 1), (-0.32672, 1), (16.5942, 1), (7.68977, 1), (11.39484, 1), (-5.11987, 1), (20.87404, 1), (8.01007, 1), (3.26497, 1), (5.61253, 1), (20.69182, 1), (0.0296, 1), (21.904, 1), (22.46572, 1), (3.63685, 1), (-5.10846, 1), (14.86389, 1), (5.47188, 1), (18.44095, 1), (16.71368, 1), (6.36704, 1), (8.82663, 1), (14.6727, 1), (7.98383, 1), (2.65568, 1), (21.45827, 1), (11.77948, 1), (4.71979, 1), (3.17951, 1), (13.90226, 1), (15.50578, 1), (10.8026, 1), (16.91369, 1), (9.90552, 1), (13.87322, 1), (4.12366, 1), (-3.78985, 1), (1.7599, 1), (3.43715, 1), (-3.45246, 1), (23.64571, 1), (-4.96877, 1), (3.93514, 1), (1.49914, 1), (12.71519, 1), (5.11521, 1), (4.79872, 1), (20.89391, 1), (5.363, 1), (8.02765, 1), (14.30804, 1), (11.49002, 1), (14.25281, 1), (7.6573, 1), (15.49686, 1), (3.29327, 1), (2.27236, 1), (12.58104, 1), (19.19128, 1), (15.25901, 1), (6.5221, 1), (10.10965, 1), (12.75249, 1), (16.50977, 1), (-8.6697, 1), (8.28553, 1), (1.44315, 1), (4.65869, 1), (0.98149, 1), (0.16623, 1), (17.66332, 1), (4.35346, 1), (6.52742, 1), (-1.06631, 1), (-5.28454, 1), (14.25583, 1), (8.74058, 1), (1.89553, 1), (-0.92959, 1), (10.30289, 1), (-6.3744, 1), (-8.1706, 1), (10.95369, 1), (4.94384, 1), (28.40568, 1), (3.7004, 1), (2.52363, 1), (4.07997, 1), (7.8849, 1), (17.95409, 1), (16.67021, 1), (11.34377, 1), (-0.07446, 1), (22.00223, 1), (3.31778, 1), (18.50719, 1), (-3.58655, 1), (6.5394, 1), (12.40459, 1), (16.59866, 1), (7.54176, 1), (-1.51044, 1), (12.69758, 1), (2.9842, 1), (2.49187, 1), (2.04113, 1), (-2.46544, 1), (15.18368, 1), (-0.04058, 1), (-0.4127, 1), (10.5526, 1), (12.03982, 1), (12.10923, 1), (11.54954, 1), (-1.18613, 1), (11.30984, 1), (23.54105, 1), (10.67321, 1), (24.09196, 1), (7.5008, 1), (12.52233, 1), (4.30673, 1), (9.35793, 1), (4.44472, 1), (-7.00679, 1), (8.56241, 1), (23.73891, 1), (15.62708, 1), (16.09205, 1), (12.52074, 1), (14.58927, 1), (-4.80187, 1), (8.47964, 1), (7.75477, 1), (12.6893, 1), (7.14147, 1), (12.12654, 1), (12.32334, 1), (7.98909, 1), (3.26652, 1), (20.53684, 1), (32.3369, 1), (19.74911, 1), (-4.62897, 1), (8.26483, 1), (20.88451, 1), (-2.12982, 1), (25.61459, 1), (5.32091, 1), (-4.1196, 1), (7.57937, 1), (21.15847, 1), (6.46355, 1), (7.74846, 1), (19.62636, 1), (28.34629, 1), (26.73919, 1), (20.40427, 1), (3.03378, 1), (10.2537, 1), (7.47745, 1), (10.79184, 1), (3.91962, 1), (19.97973, 1), (18.87711, 1), (12.56157, 1), (11.46033, 1), (3.78661, 1), (-9.45748, 1), (12.06033, 1), (-0.74615, 1), (13.2815, 1), (24.78052, 1), (5.83337, 1), (17.4111, 1), (19.70331, 1), (11.78446, 1), (-1.366, 1), (1.37458, 1), (16.31483, 1), (32.63464, 1), (-3.79736, 1), (19.17984, 1), (-0.27705, 1), (-3.69456, 1), (28.38058, 1), (-1.36876, 1), (-25.63301, 1), (3.58644, 1), (-6.85667, 1), (13.42225, 1), (12.04671, 1), (28.99468, 1), (7.87662, 1), (2.61119, 1), (-3.56022, 1), (1.50022, 1), (14.55836, 1), (9.35831, 1), (16.9366, 1), (29.23126, 1), (15.31386, 1), (13.46112, 1), (7.39667, 1), (11.15599, 1), (9.80499, 1), (22.64923, 1), (8.67693, 1), (18.67335, 1), (-3.19127, 1), (22.94716, 1), (17.86834, 1), (16.98267, 1), (15.91653, 1), (11.79718, 1), (18.50208, 1), (8.90755, 1), (10.44843, 1), (4.67433, 1), (6.82287, 1), (10.82228, 1), (-4.18631, 1), (20.3872, 1), (11.84735, 1), (21.25376, 1), (10.55032, 1), (12.19023, 1), (0.63369, 1), (7.92381, 1), (17.90933, 1), (15.30781, 1), (10.01877, 1), (0.88744, 1), (22.20967, 1), (-4.23117, 1), (21.50819, 1), (11.27421, 1), (-16.23179, 1), (33.43085, 1), (5.15093, 1), (1.34505, 1), (6.027, 1), (-10.43035, 1), (27.45998, 1), (19.24886, 1), (-4.44761, 1), (5.453, 1), (12.73758, 1), (11.2897, 1), (31.032, 1), (7.39168, 1), (11.95245, 1), (26.279, 1), (-1.0255, 1), (10.36675, 1), (11.58439, 1), (27.8405, 1), (13.1707, 1), (31.39133, 1), (27.08301, 1), (-2.14368, 1), (4.08476, 1), (21.5573, 1), (16.69822, 1), (7.69955, 1), (8.32793, 1), (6.49235, 1), (-7.3284, 1), (10.58264, 1), (-6.17006, 1), (34.55782, 1), (10.93221, 1), (44.24299, 1), (14.6224, 1), (-7.42798, 1), (15.52351, 1), (11.33982, 1), (10.46716, 1), (13.0986, 1), (-4.25988, 1), (9.55316, 1), (0.75489, 1), (25.99212, 1), (-0.81401, 1), (3.49551, 1), (22.99402, 1), (10.99628, 1), (23.70223, 1), (2.71482, 1), (22.82309, 1), (31.25686, 1), (4.86318, 1), (-1.06476, 1), (15.10298, 1), (-0.61015, 1), (17.81246, 1), (-1.55788, 1), (18.09709, 1), (9.11271, 1), (9.94682, 1), (-7.33194, 1), (-4.67293, 1), (21.81717, 1), (7.16318, 1), (13.25649, 1), (13.88776, 1), (4.95793, 1), (17.65303, 1), (14.47382, 1), (13.19373, 1), (31.86093, 1), (5.73161, 1), (10.96492, 1), (6.97951, 1), (1.75136, 1), (10.96144, 1), (15.08137, 1), (9.95311, 1), (7.07729, 1), (3.08148, 1), (22.37954, 1), (8.51951, 1), (2.88746, 1), (26.73509, 1), (-2.88939, 1), (-2.82367, 1), (-0.35783, 1), (14.22076, 1), (11.50295, 1), (7.10171, 1), (8.28488, 1), (0.54178, 1), (13.8022, 1), (15.62157, 1), (10.79173, 1), (28.18946, 1), (30.43524, 1), (2.54914, 1), (9.89421, 1), (13.08631, 1), (4.68761, 1), (5.61516, 1), (22.88072, 1), (7.4735, 1), (11.27382, 1), (2.39559, 1), (-3.31889, 1), (9.61957, 1), (23.01381, 1), (-1.23467, 1), (9.07691, 1), (15.78056, 1), (12.28421, 1), (9.44888, 1), (13.16928, 1), (4.33357, 1), (2.21737, 1), (33.17833, 1), (13.25407, 1), (-2.47961, 1), (6.41401, 1), (18.8439, 1), (-4.63375, 1), (-8.2909, 1), (12.18221, 1), (-2.95356, 1), (19.61659, 1), (12.45056, 1), (-4.17198, 1), (21.9641, 1), (11.96416, 1), (12.74573, 1), (10.47873, 1), (12.73295, 1), (11.31373, 1), (9.9827, 1), (5.87138, 1), (4.24372, 1), (-23.72256, 1), (28.41337, 1), (4.88103, 1), (3.61902, 1), (8.93586, 1), (16.40759, 1), (27.84494, 1), (5.6001, 1), (14.51379, 1), (13.5576, 1), (12.92213, 1), (3.90686, 1), (17.07104, 1), (15.84268, 1), (17.38777, 1), (16.54766, 1), (5.94487, 1), (17.02804, 1), (7.66386, 1), (10.43088, 1), (6.16059, 1), (20.46178, 1), (20.02888, 1), (20.95949, 1), (6.50808, 1), (7.22366, 1), (8.06659, 1), (16.08241, 1), (13.83514, 1), (-0.33454, 1), (12.98848, 1), (12.99024, 1); + +SELECT +roundBankers(kolmogorovSmirnovTest(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','asymp')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','asymp')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','asymp')(left, right).2, 6) , +roundBankers(kolmogorovSmirnovTest(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','asymp')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','asymp')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','asymp')(left, right).1, 6) +from kstest; + +DROP TABLE IF EXISTS kstest; + + +CREATE TABLE kstest (left Float64, right Float64) ENGINE = Memory; + +INSERT INTO kstest VALUES (4.82025, 0), (6.13896, 0), (15.20277, 0), (14.15351, 0), (7.21338, 0), (8.55506, 0), (13.80816, 0), (11.28411, 0), (7.4612, 0), (7.43759, 0), (12.9832, 0), (-5.74783, 0), (12.47114, 0), (15.14223, 0), (3.40603, 0), (9.27323, 0), (7.88547, 0), (8.56456, 0), (4.59731, 0), (7.91213, 0), (7.33894, 0), (21.74811, 0), (11.92111, 0), (0.18828, 0), (10.47314, 0), (20.37396, 0), (11.04991, 0), (13.30083, 0), (14.28065, 0), (2.86942, 0), (24.96072, 0), (14.20164, 0), (18.28769, 0), (10.50949, 0), (9.22273, 0), (11.77608, 0), (8.56872, 0), (13.74535, 0), (11.65209, 0), (12.51894, 0), (17.76256, 0), (13.52122, 0), (8.70796, 0), (6.04749, 0), (16.33064, 0), (8.35636, 0), (14.03496, 0), (11.05834, 0), (14.49261, 0), (2.59383, 0), (8.01022, 0), (4.05458, 0), (13.26384, 0), (14.62058, 0), (10.52489, 0), (8.46357, 0), (6.4147, 0), (9.70071, 0), (12.47581, 0), (4.38333, 0), (17.54172, 0), (10.12109, 0), (7.73186, 0), (14.0279, 0), (11.6621, 0), (17.47045, 0), (15.50223, 0), (15.46034, 0), (13.39964, 0), (14.98025, 0), (15.87912, 0), (17.67374, 0), (9.64073, 0), (12.84904, 0), (7.70278, 0), (13.03156, 0), (9.04512, 0), (15.97014, 0), (8.96389, 0), (11.48009, 0), (9.71153, 0), (13.00084, 0), (12.39803, 0), (13.08188, 0), (5.82244, 0), (10.81871, 0), (8.2539, 0), (7.52114, 0), (9.11488, 0), (8.37482, 0), (14.48652, 0), (11.42152, 0), (16.03111, 0), (13.14057, 0), (-2.26351, 0), (15.50394, 0), (14.88603, 0), (13.37257, 0), (11.84026, 0), (7.66558, 0), (6.24584, 0), (3.6312, 0), (2.7018, 0), (5.63656, 0), (5.82643, 0), (10.06745, 0), (-0.5831, 0), (14.84202, 0), (9.5524, 0), (19.71713, 0), (14.23109, 0), (8.69105, 0), (5.33742, 0), (7.30372, 0), (7.93342, 0), (15.20884, 0), (7.53839, 0), (13.45311, 0), (11.04473, 0), (10.76673, 0), (15.44145, 0), (14.06596, 0), (9.14873, 0), (12.88372, 0), (8.74994, 0), (10.53263, 0), (16.16694, 0), (8.37197, 0), (3.43739, 0), (4.72799, 0), (9.08802, 0), (11.2531, 0), (5.16115, 0), (10.20895, 0), (18.70884, 0), (15.88924, 0), (3.38758, 0), (6.46449, 0), (10.21088, 0), (14.08458, 0), (15.74508, 0), (19.31896, 0), (13.19641, 0), (11.95409, 0), (10.70718, 0), (1.05245, 0), (10.04772, 0), (17.01369, 0), (10.2286, 0), (19.58323, 0), (7.02892, 0), (4.16866, 0), (8.94326, 0), (4.99854, 0), (8.88352, 0), (18.65422, 0), (17.32328, 0), (9.33492, 0), (14.94788, 0), (8.05863, 0), (14.6737, 0), (10.93801, 0), (0.54036, 0), (-0.34242, 0), (5.89076, 0), (3.15189, 0), (1.94421, 0), (6.38698, 0), (10.50654, 0), (8.95362, 0), (6.23711, 0), (11.75359, 0), (12.42155, 0), (-1.55472, 0), (4.6688, 0), (10.48087, 0), (11.74615, 0), (9.26822, 0), (7.55517, 0), (12.76005, 0), (16.47102, 0), (11.31297, 0), (14.37437, 0), (2.38799, 0), (6.44577, 0), (5.07471, 0), (11.55123, 0), (7.76795, 0), (10.60116, 0), (14.40885, 0), (11.58158, 0), (8.81648, 0), (12.92299, 0), (11.26939, 0), (17.95014, 0), (2.95002, 0), (17.41959, 0), (11.12455, 0), (8.78541, 0), (14.36413, 0), (12.98554, 0), (12.58505, 0), (15.49789, 0), (11.70999, 0), (0.65596, 0), (11.08202, 0), (14.75752, 0), (6.84385, 0), (9.27245, 0), (13.78243, 0), (17.4863, 0), (4.01777, 0), (11.82861, 0), (13.86551, 0), (6.16591, 0), (8.71589, 0), (16.77195, 0), (17.23243, 0), (-2.12941, 0), (5.66629, 0), (12.45153, 0), (1.63971, 0), (13.84031, 0), (4.6144, 0), (5.26169, 0), (9.27769, 0), (9.14288, 0), (9.71953, 0), (9.38446, 0), (1.64788, 0), (11.72922, 0), (13.68926, 0), (9.42952, 0), (12.05574, 0), (9.09148, 0), (5.32273, 0), (20.25258, 0), (10.14599, 0), (10.82156, 0), (5.75736, 0), (7.13567, 0), (9.29746, 0), (5.1618, 0), (10.076, 0), (21.65669, 0), (13.35486, 0), (6.79957, 0), (8.76243, 0), (14.59294, 0), (16.90609, 0), (10.50337, 0), (-0.07923, 0), (13.51648, 0), (12.0676, 0), (0.86482, 0), (9.03563, 0), (5.38751, 0), (17.16866, 0), (2.78702, 0), (11.15548, 0), (12.30843, 0), (8.04897, 0), (9.95814, 0), (11.29308, 0), (14.13032, 0), (21.05877, 0), (3.57386, 0), (7.96631, 0), (3.30484, 0), (18.61856, 0), (16.35184, 0), (7.65236, 0), (18.02895, 0), (9.79458, 0), (16.7274, 0), (8.84453, 0), (13.05709, 0), (10.91447, 0), (8.40171, 0), (16.95211, 0), (11.82194, 0), (19.87978, 0), (12.88455, 0), (-0.00947, 0), (12.28109, 0), (6.96462, 0), (13.75282, 0), (14.39141, 0), (11.07193, 0), (12.88039, 0), (11.38253, 0), (21.02707, 0), (7.51955, 0), (6.31984, 0), (15.6543, 0), (14.80315, 0), (8.38024, 0), (21.7516, 0), (14.31336, 0), (15.04703, 0), (5.73787, 0), (13.16911, 0), (12.40695, 0), (9.88968, 0), (8.46703, 0), (8.70637, 0), (8.03551, 0), (5.9757, 0), (12.22951, 0), (3.14736, 0), (10.51266, 0), (18.593, 0), (10.82213, 0), (7.14216, 0), (6.81154, 0), (-0.6486, 0), (20.56136, 0), (11.35367, 0), (11.38205, 0), (17.14, 0), (14.91215, 0), (15.50207, 0), (5.93162, 0), (3.74869, 0), (14.11532, 0), (7.38954, 0), (5.45764, 0), (18.33733, 0), (9.91923, 0), (2.38991, 0), (14.16756, 0), (2.39791, 0), (6.92586, 0), (5.32474, 0), (2.28812, 0), (5.71718, 0), (5.84197, 0), (2.76206, 0), (19.05928, 0), (11.51788, 0), (6.56648, 0), (3.35735, 0), (7.55948, 0), (19.99908, 0), (13.00634, 0), (18.36886, 0), (11.14675, 0), (16.72931, 0), (12.50106, 0), (6.00605, 0), (23.06653, 0), (5.39694, 0), (9.53167, 0), (12.76944, 0), (7.20604, 0), (13.25391, 0), (13.7341, 0), (10.85292, 0), (-7.75835, 0), (10.29728, 0), (13.70099, 0), (10.17959, 0), (9.98399, 0), (12.69389, 0), (-0.28848, 0), (-2.18319, 0), (13.36378, 0), (10.09232, 0), (5.49489, 0), (5.46156, 0), (0.94225, 0), (12.79205, 0), (10.09593, 0), (6.06218, 0), (0.89463, 0), (11.88986, 0), (10.79733, 0), (1.51371, 0), (2.20967, 0), (15.45732, 0), (16.5262, 0), (5.99724, 0), (8.3613, 0), (15.68183, 0), (15.32117, 0), (14.15674, 0), (6.64553, 0), (4.20777, 0), (-0.10521, 0), (-0.88169, 0), (1.85913, 0), (9.73673, 0), (0.30926, 0), (6.17559, 0), (11.76602, 0), (5.68385, 0), (14.57088, 0), (12.81509, 0), (9.85682, 0), (12.06376, 0), (6.08874, 0), (11.63921, 0), (14.86722, 0), (10.41035, 0), (2.93794, 0), (12.21841, 0), (0.23804, 0), (3.14845, 0), (7.29748, 0), (3.06134, 0), (13.77684, 0), (16.21992, 0), (5.33511, 0), (9.68959, 0), (9.44169, 0), (18.08012, 0), (4.04224, 0), (8.77918, 0), (10.18324, 0), (9.38914, 0), (11.76995, 0), (14.19963, 0), (6.88817, 0), (16.56123, 0), (15.39885, 0), (5.21241, 0), (4.44408, 0), (17.87587, 0), (12.53337, 0), (13.60916, 0), (6.60104, 0), (7.35453, 0), (18.61572, 0), (6.10437, 0), (13.08682, 0), (12.15404, 0), (4.90789, 0), (2.13353, 0), (12.49593, 0), (11.93056, 0), (13.29408, 0), (5.70038, 0), (8.40271, 0), (5.19456, 0), (-5.51028, 0), (14.0329, 0), (10.38365, 0), (6.56812, 0), (4.21129, 0), (9.7157, 0), (9.88553, 0), (13.45346, 0), (4.97752, 0), (12.77595, 0), (8.56465, 0), (4.27703, 0), (18.12502, 0), (12.45735, 0), (12.42912, 0), (12.08125, 0), (10.85779, 0), (4.36013, 0), (11.85062, 0), (8.47776, 0), (9.60822, 0), (11.3069, 0), (14.25525, 0), (1.55168, 0), (14.57782, 0), (7.84786, 0), (9.87774, 0), (14.75575, 0), (3.68774, 0), (9.37667, 0), (20.28676, 0), (12.10027, 0), (8.01819, 0), (18.78158, 0), (20.85402, 0), (18.98069, 0), (16.1429, 0), (9.24047, 0), (14.12487, 0), (10.18841, 0), (-3.04478, 0), (5.7552, 0), (9.30376, 0), (11.42837, 0), (6.02364, 0), (8.86984, 0), (10.91177, 0), (10.04418, 0), (18.10774, 0), (7.49384, 0), (9.11556, 0), (9.7051, 0), (5.23268, 0), (9.04647, 0), (8.81547, 0), (2.65098, 0), (-2.69857, 1), (15.80943, 1), (7.31555, 1), (3.96517, 1), (4.77809, 1), (9.6472, 1), (-26.41717, 1), (-10.85635, 1), (-1.4376, 1), (-0.96308, 1), (2.84315, 1), (5.79467, 1), (-3.06091, 1), (-14.62902, 1), (22.08022, 1), (-2.11982, 1), (-4.84824, 1), (-10.50447, 1), (2.4891, 1), (9.90324, 1), (-22.66866, 1), (-0.97103, 1), (-16.57608, 1), (-3.78749, 1), (25.84511, 1), (5.30797, 1), (-18.19466, 1), (11.72708, 1), (0.2891, 1), (-9.83474, 1), (6.69942, 1), (18.09604, 1), (18.52651, 1), (1.38201, 1), (7.64615, 1), (17.66598, 1), (-2.44141, 1), (-9.01598, 1), (27.69142, 1), (4.06946, 1), (-15.0077, 1), (-10.49648, 1), (-4.88322, 1), (-25.09805, 1), (-4.64024, 1), (20.94434, 1), (24.12126, 1), (-14.10962, 1), (10.6512, 1), (14.50687, 1), (-19.88081, 1), (-11.55271, 1), (13.16921, 1), (16.63864, 1), (-24.08114, 1), (-9.09949, 1), (-10.54702, 1), (0.20813, 1), (8.19066, 1), (-2.70523, 1), (-0.23954, 1), (7.19398, 1), (-7.1618, 1), (-7.44322, 1), (-17.92031, 1), (-1.58146, 1), (9.18338, 1), (3.25838, 1), (-14.30234, 1), (1.84695, 1), (31.13794, 1), (-0.85067, 1), (19.02787, 1), (-3.09594, 1), (13.45584, 1), (-5.48104, 1), (-22.74928, 1), (-8.03697, 1), (17.31143, 1), (-16.65231, 1), (-18.58713, 1), (-16.52641, 1), (14.95261, 1), (12.56762, 1), (15.00188, 1), (1.85858, 1), (2.1926, 1), (-2.4095, 1), (21.56873, 1), (3.35509, 1), (-4.98672, 1), (35.08603, 1), (-10.01602, 1), (-3.85153, 1), (-6.81974, 1), (19.56525, 1), (-9.35488, 1), (0.24268, 1), (-3.51488, 1), (-0.37066, 1), (24.20888, 1), (-11.73537, 1), (0.01282, 1), (0.03963, 1), (-9.65589, 1), (-0.37429, 1), (5.61255, 1), (0.49984, 1), (-10.15066, 1), (-14.54314, 1), (16.56889, 1), (-7.73873, 1), (-3.76422, 1), (1.40722, 1), (2.28818, 1), (-13.12643, 1), (5.17082, 1), (4.79089, 1), (-17.42643, 1), (8.72548, 1), (-3.70285, 1), (16.77893, 1), (13.382, 1), (19.98418, 1), (0.00483, 1), (-4.75951, 1), (2.35391, 1), (21.65809, 1), (-9.2714, 1), (-18.38253, 1), (7.23097, 1), (14.97927, 1), (-4.02197, 1), (-29.8189, 1), (-12.8554, 1), (-7.60124, 1), (-14.90158, 1), (-3.31486, 1), (31.38144, 1), (-8.61288, 1), (15.31895, 1), (-10.19488, 1), (13.796, 1), (-0.32912, 1), (-0.0684, 1), (-30.06834, 1), (24.93912, 1), (-3.26506, 1), (-8.29751, 1), (-5.39189, 1), (-25.08603, 1), (-1.45318, 1), (16.72724, 1), (-3.38467, 1), (-26.00478, 1), (7.28369, 1), (16.96226, 1), (16.5858, 1), (10.46583, 1), (3.84345, 1), (-2.99382, 1), (1.42078, 1), (-11.0123, 1), (2.09909, 1), (1.21064, 1), (15.36079, 1), (-21.61349, 1), (22.7726, 1), (10.50512, 1), (-6.95825, 1), (9.20036, 1), (15.66902, 1), (3.28098, 1), (-9.05692, 1), (0.32882, 1), (-1.64934, 1), (-4.81406, 1), (-5.06006, 1), (19.97493, 1), (2.88646, 1), (-0.34552, 1), (7.55186, 1), (-22.96115, 1), (31.29166, 1), (6.18798, 1), (-2.52715, 1), (-11.58799, 1), (14.13596, 1), (13.45069, 1), (12.15179, 1), (3.44491, 1), (-8.78006, 1), (18.32087, 1), (11.91757, 1), (-2.00179, 1), (10.88411, 1), (9.09327, 1), (6.62484, 1), (8.87178, 1), (11.52254, 1), (-14.15988, 1), (-17.19515, 1), (14.03089, 1), (-2.4095, 1), (-16.83575, 1), (2.71469, 1), (4.84351, 1), (-1.17651, 1), (-3.37529, 1), (-19.92137, 1), (4.48952, 1), (-12.4906, 1), (-5.65277, 1), (8.50819, 1), (-19.61261, 1), (12.54156, 1), (11.06784, 1), (-12.59285, 1), (3.43683, 1), (-3.00325, 1), (12.49082, 1), (7.20955, 1), (17.6547, 1), (15.8619, 1), (24.3048, 1), (-8.05434, 1), (-6.06901, 1), (-15.69515, 1), (-11.13917, 1), (-3.90757, 1), (-2.57038, 1), (5.14065, 1), (17.8497, 1), (-8.64665, 1), (-18.68331, 1), (5.8567, 1), (-20.93884, 1), (4.40583, 1), (14.35985, 1), (4.18134, 1), (4.3635, 1), (9.35428, 1), (2.8908, 1), (16.01017, 1), (-1.48499, 1), (-9.97949, 1), (1.03055, 1), (-2.79697, 1), (6.85977, 1), (4.73213, 1), (2.7815, 1), (-2.46866, 1), (18.39425, 1), (-0.80378, 1), (-0.22982, 1), (-16.11608, 1), (3.0862, 1), (3.20779, 1), (10.50146, 1), (-0.21305, 1), (11.21012, 1), (-0.99825, 1), (18.39633, 1), (-3.39003, 1), (-0.64411, 1), (-1.39932, 1), (15.45319, 1), (-0.66044, 1), (-15.2223, 1), (-34.39907, 1), (-3.57836, 1), (16.82828, 1), (1.66624, 1), (15.43475, 1), (8.17776, 1), (5.50486, 1), (10.43082, 1), (-6.63332, 1), (2.28008, 1), (16.37203, 1), (5.16313, 1), (-8.85281, 1), (13.26692, 1), (-7.46842, 1), (8.43091, 1), (-13.18172, 1), (-0.72401, 1), (22.3881, 1), (10.65448, 1), (2.81289, 1), (10.92405, 1), (-8.95358, 1), (19.80653, 1), (-12.86527, 1), (5.38826, 1), (-6.83501, 1), (-15.7647, 1), (-27.67412, 1), (8.6499, 1), (-4.89542, 1), (16.76167, 1), (12.84284, 1), (-17.27324, 1), (-4.18726, 1), (-14.62366, 1), (-5.49863, 1), (-16.22846, 1), (10.60329, 1), (6.46781, 1), (1.70458, 1), (10.77448, 1), (0.8463, 1), (13.0482, 1), (-4.36264, 1), (3.22647, 1), (2.38828, 1), (6.7946, 1), (-0.25254, 1), (1.2497, 1), (1.6544, 1), (4.1019, 1), (11.27839, 1), (-5.04127, 1), (18.11674, 1), (0.51231, 1), (-0.51029, 1), (13.52556, 1), (16.10171, 1), (5.68197, 1), (-2.85904, 1), (-8.89167, 1), (6.24489, 1), (10.85319, 1), (-0.39816, 1), (3.87079, 1), (-3.1867, 1), (1.55322, 1), (16.86779, 1), (-14.60321, 1), (-1.81952, 1), (-3.11624, 1), (1.24193, 1), (10.18179, 1), (4.69796, 1), (0.69032, 1), (11.7723, 1), (7.62896, 1), (9.89741, 1), (9.11484, 1), (-3.84676, 1), (-0.4777, 1), (0.95958, 1), (-7.95056, 1), (-10.97474, 1), (-6.54861, 1), (34.74933, 1), (27.39463, 1), (4.18299, 1), (6.02476, 1), (-1.99397, 1), (1.26478, 1), (23.37106, 1), (10.49682, 1), (-11.04354, 1), (-12.22284, 1), (-9.87635, 1), (28.90511, 1), (6.77613, 1), (0.55352, 1), (0.37031, 1), (7.1418, 1), (3.24897, 1), (-1.60918, 1), (3.1675, 1), (-17.97072, 1), (-5.61743, 1), (14.1422, 1), (14.87695, 1), (-4.65961, 1), (-0.99174, 1), (-2.96623, 1), (-9.02263, 1), (-17.2088, 1), (2.78608, 1), (6.74239, 1), (4.8524, 1), (7.46731, 1), (1.04894, 1), (-12.8023, 1), (-17.18188, 1), (-5.08801, 1), (22.13942, 1), (-0.36384, 1), (17.80564, 1), (7.67504, 1), (1.59779, 1), (4.10942, 1), (0.61074, 1), (-14.40767, 1), (10.59906, 1), (16.57017, 1), (-15.17526, 1), (-6.98549, 1), (-0.64548, 1), (3.23756, 1), (14.65504, 1), (4.583, 1), (12.72378, 1), (5.26547, 1), (0.81781, 1), (9.38273, 1), (10.37636, 1), (10.70325, 1), (-0.83043, 1), (-7.53149, 1), (-9.09147, 1), (-19.51381, 1), (-28.44508, 1), (6.44392, 1), (11.10201, 1), (-2.86184, 1), (8.30673, 1), (8.8797, 1), (10.68053, 1), (15.62919, 1), (8.00579, 1), (6.4651, 1), (-4.50029, 1), (18.04514, 1), (11.12996, 1), (-5.14007, 1), (9.43857, 1), (3.13476, 1), (4.9772, 1), (-17.45782, 1), (0.05552, 1), (-1.90283, 1), (2.67908, 1), (-2.62243, 1), (-3.22767, 1), (-8.70222, 1), (-23.11605, 1), (21.6757, 1), (12.70076, 1), (4.4322, 1), (11.69344, 1), (9.18052, 1), (-2.2549, 1), (-2.15615, 1), (20.29765, 1), (-0.29536, 1), (15.50109, 1), (8.79187, 1), (5.11533, 1), (-20.44436, 1), (-3.00909, 1), (-4.48291, 1), (21.84462, 1), (1.94225, 1), (-2.81908, 1), (17.19418, 1), (-9.33528, 1), (-0.17346, 1), (0.03958, 1), (-35.17786, 1), (8.36887, 1), (-9.02292, 1), (-10.98804, 1), (0.29335, 1), (4.29634, 1), (3.87718, 1), (-9.08532, 1), (7.13922, 1), (-7.62463, 1), (-10.5666, 1), (4.68165, 1), (-3.30172, 1), (13.04852, 1), (13.45616, 1), (2.41043, 1), (-0.36501, 1), (-15.67383, 1), (17.92217, 1), (8.42106, 1), (3.22063, 1), (-7.31753, 1), (21.99596, 1), (-36.8273, 1), (-20.46391, 1), (5.74179, 1), (-15.83178, 1), (14.90454, 1), (-8.84645, 1), (3.72036, 1), (4.6877, 1), (16.35418, 1), (3.15441, 1), (2.39907, 1), (-17.58664, 1), (-13.18269, 1); + +SELECT +roundBankers(kolmogorovSmirnovTest(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','auto')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','exact')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','asymp')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('less','asymp')(left, right).2, 6), +roundBankers(kolmogorovSmirnovTest('greater','asymp')(left, right).2, 6) , +roundBankers(kolmogorovSmirnovTest(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','auto')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','exact')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('two-sided','asymp')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('less','asymp')(left, right).1, 6), +roundBankers(kolmogorovSmirnovTest('greater','asymp')(left, right).1, 6) +from kstest; + +DROP TABLE IF EXISTS kstest; diff --git a/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.python b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.python new file mode 100644 index 00000000000..01f245e0cf0 --- /dev/null +++ b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.python @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +import os +import sys +from scipy import stats +import pandas as pd +import numpy as np + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + +from pure_http_client import ClickHouseClient + + +def test_and_check(name, a, b, t_stat, p_value, precision=1e-2): + client = ClickHouseClient() + client.query("DROP TABLE IF EXISTS ks_test;") + client.query("CREATE TABLE ks_test (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ks_test VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ks_test VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) + real = client.query_return_df( + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM ks_test FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + client.query("DROP TABLE IF EXISTS ks_test;") + + +def test_ks_all_alternatives(rvs1, rvs2): + s, p = stats.ks_2samp(rvs1, rvs2) + test_and_check("kolmogorovSmirnovTest", rvs1, rvs2, s, p) + + s, p = stats.ks_2samp(rvs1, rvs2, alternative="two-sided") + test_and_check("kolmogorovSmirnovTest('two-sided')", rvs1, rvs2, s, p) + + s, p = stats.ks_2samp(rvs1, rvs2, alternative="greater", method="auto") + test_and_check("kolmogorovSmirnovTest('greater', 'auto')", rvs1, rvs2, s, p) + + s, p = stats.ks_2samp(rvs1, rvs2, alternative="less", method="exact") + test_and_check("kolmogorovSmirnovTest('less', 'exact')", rvs1, rvs2, s, p) + + if max(len(rvs1), len(rvs2)) > 10000: + s, p = stats.ks_2samp(rvs1, rvs2, alternative="two-sided", method="asymp") + test_and_check("kolmogorovSmirnovTest('two-sided', 'asymp')", rvs1, rvs2, s, p) + s, p = stats.ks_2samp(rvs1, rvs2, alternative="greater", method="asymp") + test_and_check("kolmogorovSmirnovTest('greater', 'asymp')", rvs1, rvs2, s, p) + + +def test_kolmogorov_smirnov(): + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=100), 2) + rvs2 = np.round(stats.norm.rvs(loc=1.5, scale=5, size=200), 2) + test_ks_all_alternatives(rvs1, rvs2) + + rvs1 = np.round(stats.norm.rvs(loc=13, scale=1, size=100), 2) + rvs2 = np.round(stats.norm.rvs(loc=1.52, scale=9, size=100), 2) + test_ks_all_alternatives(rvs1, rvs2) + + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=100), 2) + rvs2 = np.round(stats.norm.rvs(loc=11.5, scale=50, size=1000), 2) + test_ks_all_alternatives(rvs1, rvs2) + + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=11000), 2) + rvs2 = np.round(stats.norm.rvs(loc=3.5, scale=5.5, size=11000), 2) + test_ks_all_alternatives(rvs1, rvs2) + + +if __name__ == "__main__": + test_kolmogorov_smirnov() + print("Ok.") diff --git a/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.reference b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.sh b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.sh new file mode 100755 index 00000000000..674495bbefa --- /dev/null +++ b/tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02706_kolmogorov_smirnov_test_scipy.python diff --git a/tests/queries/0_stateless/02706_show_columns.reference b/tests/queries/0_stateless/02706_show_columns.reference new file mode 100644 index 00000000000..9d7e8485e19 --- /dev/null +++ b/tests/queries/0_stateless/02706_show_columns.reference @@ -0,0 +1,47 @@ +--- Aliases of SHOW COLUMNS +int32 Nullable(Int32) 1 \N +str String 0 SOR \N +uint64 UInt64 0 PRI SOR \N +int32 Nullable(Int32) 1 \N +str String 0 SOR \N +uint64 UInt64 0 PRI SOR \N +--- EXTENDED +int32 Nullable(Int32) 1 \N +str String 0 SOR \N +uint64 UInt64 0 PRI SOR \N +--- FULL +int32 Nullable(Int32) 1 \N \N example comment +str String 0 SOR \N \N +uint64 UInt64 0 PRI SOR \N \N +--- LIKE +int32 Nullable(Int32) 1 \N +uint64 UInt64 0 PRI SOR \N +--- NOT LIKE +str String 0 SOR \N +--- ILIKE +int32 Nullable(Int32) 1 \N +uint64 UInt64 0 PRI SOR \N +--- NOT ILIKE +str String 0 SOR \N +--- WHERE +int32 Nullable(Int32) 1 \N +uint64 UInt64 0 PRI SOR \N +--- LIMIT +int32 Nullable(Int32) 1 \N +--- Check with weird table names +c String 0 PRI SOR \N +c String 0 PRI SOR \N +c String 0 PRI SOR \N +c String 0 PRI SOR \N +--- Original table +int32 Nullable(Int32) 1 \N +str String 0 SOR \N +uint64 UInt64 0 PRI SOR \N +--- Equally named table in other database +int32 Int32 0 \N +str String 0 \N +uint64 UInt64 0 PRI SOR \N +--- Short form +int32 Int32 0 \N +str String 0 \N +uint64 UInt64 0 PRI SOR \N diff --git a/tests/queries/0_stateless/02706_show_columns.sql b/tests/queries/0_stateless/02706_show_columns.sql new file mode 100644 index 00000000000..b1a907c5c71 --- /dev/null +++ b/tests/queries/0_stateless/02706_show_columns.sql @@ -0,0 +1,92 @@ +-- Tags: no-parallel +-- no-parallel: creates a custom database schema and expects to use it exclusively + +-- Create a test table and verify that the output of SHOW COLUMNS is sane. +-- The matching of actual/expected results relies on the fact that the output of SHOW COLUMNS is sorted. +DROP TABLE IF EXISTS tab; +CREATE TABLE tab +( + `uint64` UInt64, + `int32` Nullable(Int32) COMMENT 'example comment', + `str` String, + INDEX idx str TYPE set(1000) +) +ENGINE = MergeTree +PRIMARY KEY (uint64) +ORDER BY (uint64, str); + +SELECT '--- Aliases of SHOW COLUMNS'; +SHOW COLUMNS FROM tab; +SHOW FIELDS FROM tab; + +SELECT '--- EXTENDED'; +SHOW EXTENDED COLUMNS FROM tab; + +SELECT '--- FULL'; +SHOW FULL COLUMNS FROM tab; + +SELECT '--- LIKE'; +SHOW COLUMNS FROM tab LIKE '%int%'; + +SELECT '--- NOT LIKE'; +SHOW COLUMNS FROM tab NOT LIKE '%int%'; + +SELECT '--- ILIKE'; +SHOW COLUMNS FROM tab ILIKE '%INT%'; + +SELECT '--- NOT ILIKE'; +SHOW COLUMNS FROM tab NOT ILIKE '%INT%'; + +SELECT '--- WHERE'; +SHOW COLUMNS FROM tab WHERE field LIKE '%int%'; + +SELECT '--- LIMIT'; +SHOW COLUMNS FROM tab LIMIT 1; + +SELECT '--- Check with weird table names'; + +DROP TABLE IF EXISTS `$4@^7`; +CREATE TABLE `$4@^7` (c String) ENGINE = MergeTree ORDER BY c; +SHOW COLUMNS FROM `$4@^7`; +DROP TABLE `$4@^7`; + +DROP TABLE IF EXISTS NULL; +CREATE TABLE NULL (c String) ENGINE = MergeTree ORDER BY c; +SHOW COLUMNS FROM NULL; +DROP TABLE NULL; + +DROP DATABASE IF EXISTS `'`; +CREATE DATABASE `'`; +CREATE TABLE `'`.`'` (c String) ENGINE = MergeTree ORDER BY c; +SHOW COLUMNS FROM `'` FROM `'`; +SHOW COLUMNS FROM `'`.`'`; -- abbreviated form +DROP TABLE `'`.`'`; +DROP DATABASE `'`; + +-- Create a table in a different database. Intentionally useing the same table/column names as above so +-- we notice if something is buggy in the implementation of SHOW COLUMNS. +DROP DATABASE IF EXISTS database_123456789abcde; +CREATE DATABASE database_123456789abcde; -- pseudo-random database name + +DROP TABLE IF EXISTS database_123456789abcde.tab; +CREATE TABLE database_123456789abcde.tab +( + `uint64` UInt64, + `int32` Int32, + `str` String +) +ENGINE = MergeTree +ORDER BY uint64; + +SELECT '--- Original table'; +SHOW COLUMNS FROM tab; + +SELECT '--- Equally named table in other database'; +SHOW COLUMNS FROM tab FROM database_123456789abcde; + +SELECT '--- Short form'; +SHOW COLUMNS FROM database_123456789abcde.tab; + +DROP DATABASE database_123456789abcde; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02707_analyzer_nested_lambdas_types.reference b/tests/queries/0_stateless/02707_analyzer_nested_lambdas_types.reference new file mode 100644 index 00000000000..9502c314200 --- /dev/null +++ b/tests/queries/0_stateless/02707_analyzer_nested_lambdas_types.reference @@ -0,0 +1,5 @@ +[0] [[0]] +[0] [[0]] +[0] [[1]] +[0] [[1]] +[0] [[1]] diff --git a/tests/queries/0_stateless/02707_analyzer_nested_lambdas_types.sql b/tests/queries/0_stateless/02707_analyzer_nested_lambdas_types.sql new file mode 100644 index 00000000000..f9258d61900 --- /dev/null +++ b/tests/queries/0_stateless/02707_analyzer_nested_lambdas_types.sql @@ -0,0 +1,24 @@ +SELECT + range(1), + arrayMap(x -> arrayMap(x -> x, range(x)), [1]) +SETTINGS allow_experimental_analyzer = 0; + +SELECT + range(1), + arrayMap(x -> arrayMap(x -> x, range(x)), [1]) +SETTINGS allow_experimental_analyzer = 1; + +SELECT + range(1), + arrayMap(x -> arrayMap(x -> 1, range(x)), [1]) +SETTINGS allow_experimental_analyzer = 0; + +SELECT + range(1), + arrayMap(x -> arrayMap(x -> 1, range(x)), [1]) +SETTINGS allow_experimental_analyzer = 1; + +SELECT + range(1), + arrayMap(x -> arrayMap(y -> 1, range(x)), [1]) +SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference new file mode 100644 index 00000000000..7ae6daf4b8d --- /dev/null +++ b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference @@ -0,0 +1,32 @@ +1 Some string 0 0 0 +2 Some other string 0 0 0 +3 random 0 0 0 +4 random2 0 0 0 +----------- +3 random 0 0 +4 random2 0 0 +----------- +3 random 0 0 +----------- +0 +----------- +1 String 10 0 +2 String 20 0 +3 String 30 0 +4 String 40 0 +----------- +1 String 10 0 +2 String 20 0 +3 Another 30 1 +4 Another 40 1 +----------- +1 String 10 0 +2 String 20 0 +3 Another 30 1 +4 Another 40 1 +----------- +1 String 102 1 +2 String 202 1 +3 Another 302 2 +4 Another 402 2 +----------- diff --git a/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql new file mode 100644 index 00000000000..cf59af2f388 --- /dev/null +++ b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql @@ -0,0 +1,44 @@ +-- Tags: no-ordinary-database, no-fasttest + +DROP TABLE IF EXISTS 02707_keepermap_delete_update; + +SET keeper_map_strict_mode = 1; + +CREATE TABLE 02707_keepermap_delete_update (key UInt64, value String, value2 UInt64) ENGINE=KeeperMap('/' || currentDatabase() || '/test02707_keepermap_delete_update') PRIMARY KEY(key); + +INSERT INTO 02707_keepermap_delete_update VALUES (1, 'Some string', 0), (2, 'Some other string', 0), (3, 'random', 0), (4, 'random2', 0); + +SELECT *, _version, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +DELETE FROM 02707_keepermap_delete_update WHERE value LIKE 'Some%string'; + +SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +ALTER TABLE 02707_keepermap_delete_update DELETE WHERE key >= 4; + +SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +DELETE FROM 02707_keepermap_delete_update WHERE 1 = 1; +SELECT count() FROM 02707_keepermap_delete_update; +SELECT '-----------'; + +INSERT INTO 02707_keepermap_delete_update VALUES (1, 'String', 10), (2, 'String', 20), (3, 'String', 30), (4, 'String', 40); +SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +ALTER TABLE 02707_keepermap_delete_update UPDATE value = 'Another' WHERE key > 2; +SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +ALTER TABLE 02707_keepermap_delete_update UPDATE key = key * 10 WHERE 1 = 1; -- { serverError 36 } +SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +ALTER TABLE 02707_keepermap_delete_update UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100; +SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key; +SELECT '-----------'; + +DROP TABLE IF EXISTS 02707_keepermap_delete_update; diff --git a/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference new file mode 100644 index 00000000000..a01aba9895b --- /dev/null +++ b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference @@ -0,0 +1 @@ +(42,'Hello',[1,2,3]) diff --git a/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh new file mode 100755 index 00000000000..735117c6603 --- /dev/null +++ b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +SCHEMADIR=$CURDIR/format_schemas + +$CLICKHOUSE_LOCAL -q "select tuple(42, 'Hello', [1,2,3]) as x format Protobuf settings format_schema='$SCHEMADIR/02707_schema:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --structure='x Tuple(UInt32, String, Array(UInt32))' -q "select * from table" --format_schema="$SCHEMADIR/02707_schema:Message" + diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.reference b/tests/queries/0_stateless/02707_skip_index_with_in.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.sql b/tests/queries/0_stateless/02707_skip_index_with_in.sql new file mode 100644 index 00000000000..4767619cee1 --- /dev/null +++ b/tests/queries/0_stateless/02707_skip_index_with_in.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS t_skip_index_in; + +CREATE TABLE t_skip_index_in +( + a String, + b String, + c String, + INDEX idx_c c TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY (a, b); + +INSERT INTO t_skip_index_in VALUES ('a', 'b', 'c'); + +-- This query checks that set is not being built if indexes are not used, +-- because with EXPLAIN the set will be built only for analysis of indexes. +EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 0 FORMAT Null; +EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + +DROP TABLE t_skip_index_in; diff --git a/tests/queries/0_stateless/02708_dot_product.reference b/tests/queries/0_stateless/02708_dot_product.reference new file mode 100644 index 00000000000..45e53871aa2 --- /dev/null +++ b/tests/queries/0_stateless/02708_dot_product.reference @@ -0,0 +1,14 @@ +3881.304 +3881.304 +3881.304 +376.5 +230 +0 +0 +Float64 +Float32 +Float64 +Float64 +UInt16 +UInt64 +Int64 diff --git a/tests/queries/0_stateless/02708_dot_product.sql b/tests/queries/0_stateless/02708_dot_product.sql new file mode 100644 index 00000000000..e94cb577bf4 --- /dev/null +++ b/tests/queries/0_stateless/02708_dot_product.sql @@ -0,0 +1,55 @@ +SELECT dotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); + +SELECT scalarProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); + +SELECT arrayDotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); + +SELECT dotProduct([1.3, 2, 3, 4, 5], [222, 12, 5.3, 2, 8]); + +SELECT dotProduct([1, 1, 1, 1, 1], [222, 12, 0, -12, 8]); + +SELECT round(dotProduct([12345678901234567], [1]) - dotProduct(tuple(12345678901234567), tuple(1)), 2); + +SELECT round(dotProduct([-1, 2, 3.002], [2, 3.4, 4]) - dotProduct((-1, 2, 3.002), (2, 3.4, 4)), 2); + +DROP TABLE IF EXISTS product_fp64_fp64; +CREATE TABLE product_fp64_fp64 (x Array(Float64), y Array(Float64)) engine = MergeTree() order by x; +INSERT INTO TABLE product_fp64_fp64 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_fp64_fp64; +DROP TABLE product_fp64_fp64; + +DROP TABLE IF EXISTS product_fp32_fp32; +CREATE TABLE product_fp32_fp32 (x Array(Float32), y Array(Float32)) engine = MergeTree() order by x; +INSERT INTO TABLE product_fp32_fp32 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp32; +DROP TABLE product_fp32_fp32; + +DROP TABLE IF EXISTS product_fp32_fp64; +CREATE TABLE product_fp32_fp64 (x Array(Float32), y Array(Float64)) engine = MergeTree() order by x; +INSERT INTO TABLE product_fp32_fp64 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp64; +DROP TABLE product_fp32_fp64; + +DROP TABLE IF EXISTS product_uint8_fp64; +CREATE TABLE product_uint8_fp64 (x Array(UInt8), y Array(Float64)) engine = MergeTree() order by x; +INSERT INTO TABLE product_uint8_fp64 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_uint8_fp64; +DROP TABLE product_uint8_fp64; + +DROP TABLE IF EXISTS product_uint8_uint8; +CREATE TABLE product_uint8_uint8 (x Array(UInt8), y Array(UInt8)) engine = MergeTree() order by x; +INSERT INTO TABLE product_uint8_uint8 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_uint8_uint8; +DROP TABLE product_uint8_uint8; + +DROP TABLE IF EXISTS product_uint64_uint64; +CREATE TABLE product_uint64_uint64 (x Array(UInt64), y Array(UInt64)) engine = MergeTree() order by x; +INSERT INTO TABLE product_uint64_uint64 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_uint64_uint64; +DROP TABLE product_uint64_uint64; + +DROP TABLE IF EXISTS product_int32_uint64; +CREATE TABLE product_int32_uint64 (x Array(Int32), y Array(UInt64)) engine = MergeTree() order by x; +INSERT INTO TABLE product_int32_uint64 (x, y) values ([1, 2], [3, 4]); +SELECT toTypeName(dotProduct(x, y)) from product_int32_uint64; +DROP TABLE product_int32_uint64; diff --git a/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql new file mode 100644 index 00000000000..ff7b53ce01f --- /dev/null +++ b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql @@ -0,0 +1,4 @@ +CREATE TABLE IF NOT EXISTS t_02708(x DateTime) ENGINE = MergeTree ORDER BY tuple(); +SET send_logs_level='error'; +SELECT count() FROM t_02708 SETTINGS allow_experimental_parallel_reading_from_replicas=1; +DROP TABLE t_02708; diff --git a/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference new file mode 100644 index 00000000000..49f0c2c1360 --- /dev/null +++ b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference @@ -0,0 +1,25 @@ +32.077 +-421.374 +401.741 +19.925 +-53.055 +-741245.011359027 +-108826.704302334 +-885710.601317107 +200615.252943765 +571119.753066497 +-1810705720.77468465228544079155 +4423020457.03124833705321108749 +-7357115755.03893179428185573375 +-3596476105.34116783307269095642 +-882544888.46147190610682821046 +2998274172057708048.9272057168211482653001963259921827760845 +-59397657133291422934.9333984240607897191609175175045361436671 +39869165044835399916.6747138660882932389363995495451540473418 +-57185968744047146404.1855920695644202095475491426441451681562 +-45866526235163110880.4305861667709353032099072504822212330478 +1 +0 +1 +1 +1 diff --git a/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql new file mode 100644 index 00000000000..c290ce4833a --- /dev/null +++ b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql @@ -0,0 +1,5 @@ +select toString(x)::Decimal(6, 3) from generateRandom('x Decimal(6, 3)', 42) limit 5; +select toString(x)::Decimal(15, 9) from generateRandom('x Decimal(15, 9)', 42) limit 5; +select toString(x)::Decimal(30, 20) from generateRandom('x Decimal(30, 20)', 42) limit 5; +select toString(x)::Decimal(60, 40) from generateRandom('x Decimal(60, 40)', 42) limit 5; +select reinterpret(x, 'UInt8') from generateRandom('x Bool', 42) limit 5; diff --git a/tests/queries/0_stateless/02709_storage_memory_compressed.reference b/tests/queries/0_stateless/02709_storage_memory_compressed.reference new file mode 100644 index 00000000000..9a665fab809 --- /dev/null +++ b/tests/queries/0_stateless/02709_storage_memory_compressed.reference @@ -0,0 +1,2 @@ +1 foo ['0','1','2','3','4'] {'k1':'v1'} +2 bar ['0','1','2','3','4'] {'k2':'v2'} diff --git a/tests/queries/0_stateless/02709_storage_memory_compressed.sql b/tests/queries/0_stateless/02709_storage_memory_compressed.sql new file mode 100644 index 00000000000..0e12b47998c --- /dev/null +++ b/tests/queries/0_stateless/02709_storage_memory_compressed.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t_memory_compressed; + +CREATE TABLE t_memory_compressed (id UInt64, s String, arr Array(LowCardinality(String)), m Map(String, String)) +ENGINE = Memory SETTINGS compress = 1; + +INSERT INTO t_memory_compressed VALUES (1, 'foo', range(5), map('k1', 'v1')); +INSERT INTO t_memory_compressed VALUES (2, 'bar', range(5), map('k2', 'v2')); + +SELECT * FROM t_memory_compressed ORDER BY id; + +DROP TABLE t_memory_compressed; diff --git a/tests/queries/0_stateless/02710_aggregation_nested_map_ip_uuid.reference b/tests/queries/0_stateless/02710_aggregation_nested_map_ip_uuid.reference new file mode 100644 index 00000000000..c5b787f3dd6 --- /dev/null +++ b/tests/queries/0_stateless/02710_aggregation_nested_map_ip_uuid.reference @@ -0,0 +1,3 @@ +1 ['1.2.3.4'] [5] ['::1'] [7] ['00130949-0cd4-4c3d-84c4-cc421eff480f'] [9] +1 ['1.2.3.4','2.3.4.5'] [37,13] ['::1','::2'] [41,14] ['00000000-0cd4-4c3d-84c4-cc421eff480f','00130949-0cd4-4c3d-84c4-cc421eff480f'] [39,21] +2 ['1.2.3.4','2.3.4.5'] [26,16] ['::1','::2'] [25,20] ['00130949-0cd4-4c3d-84c4-cc421eff480f'] [48] diff --git a/tests/queries/0_stateless/02710_aggregation_nested_map_ip_uuid.sql b/tests/queries/0_stateless/02710_aggregation_nested_map_ip_uuid.sql new file mode 100644 index 00000000000..456e8723d08 --- /dev/null +++ b/tests/queries/0_stateless/02710_aggregation_nested_map_ip_uuid.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS summing_table; +CREATE TABLE summing_table +( + id UInt32, + `ip4Map.value` Array(IPv4), `ip4Map.total` Array(UInt32), + `ip6Map.value` Array(IPv6), `ip6Map.total` Array(UInt32), + `uuidMap.value` Array(UUID), `uuidMap.total` Array(UInt32) +) ENGINE = SummingMergeTree ORDER BY id; + +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values (1, ['1.2.3.4'], [1], ['::1'], [2], ['00130949-0cd4-4c3d-84c4-cc421eff480f'], [3]); +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(1, ['1.2.3.4'], [4], ['::1'], [5], ['00130949-0cd4-4c3d-84c4-cc421eff480f'], [6]); +OPTIMIZE TABLE summing_table FINAL; +SELECT * FROM summing_table ORDER BY id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total; + +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(2, ['1.2.3.4'], [7], ['::1'], [8], ['00130949-0cd4-4c3d-84c4-cc421eff480f'], [9]); +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(1, ['1.2.3.4'], [10], ['::1'], [11], ['00130949-0cd4-4c3d-84c4-cc421eff480f'], [12]); +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(1, ['2.3.4.5'], [13], ['::2'], [14], ['00000000-0cd4-4c3d-84c4-cc421eff480f'], [15]); +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(2, ['2.3.4.5'], [16], ['::1'], [17], ['00130949-0cd4-4c3d-84c4-cc421eff480f'], [18]); +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(2, ['1.2.3.4'], [19], ['::2'], [20], ['00130949-0cd4-4c3d-84c4-cc421eff480f'], [21]); +INSERT INTO summing_table(id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total) + values(1, ['1.2.3.4'], [22], ['::1'], [23], ['00000000-0cd4-4c3d-84c4-cc421eff480f'], [24]); +OPTIMIZE TABLE summing_table FINAL; +SELECT * FROM summing_table ORDER BY id, ip4Map.value, ip4Map.total, ip6Map.value, ip6Map.total, uuidMap.value, uuidMap.total; + +DROP TABLE summing_table; diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.reference b/tests/queries/0_stateless/02710_allow_suspicious_indices.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql new file mode 100644 index 00000000000..78d52f7bc72 --- /dev/null +++ b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql @@ -0,0 +1,22 @@ +-- Check CREATE TABLE + +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl (id UInt32) ENGINE = MergeTree() ORDER BY (id + 1, id + 1); -- { serverError BAD_ARGUMENTS } +CREATE TABLE tbl (id UInt32) ENGINE = MergeTree() ORDER BY (id + 1, id + 1) SETTINGS allow_suspicious_indices = 1; + +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl (id UInt32, INDEX idx (id + 1, id + 1) TYPE minmax) ENGINE = MergeTree() ORDER BY id; -- { serverError BAD_ARGUMENTS } +CREATE TABLE tbl (id UInt32, INDEX idx (id + 1, id + 1) TYPE minmax) ENGINE = MergeTree() ORDER BY id SETTINGS allow_suspicious_indices = 1; + +-- Check ALTER TABLE + +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl (id1 UInt32) ENGINE = MergeTree() ORDER BY id1; +ALTER TABLE tbl ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id1, id2, id2); -- { serverError BAD_ARGUMENTS } +ALTER TABLE tbl ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id1, id2, id1); -- { serverError BAD_ARGUMENTS } +ALTER TABLE tbl ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id1, id2, id2) SETTINGS allow_suspicious_indices = 1; + +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl (id UInt32) ENGINE = MergeTree() ORDER BY id; +ALTER TABLE tbl ADD INDEX idx (id+1, id, id+1) TYPE minmax; -- { serverError BAD_ARGUMENTS } +ALTER TABLE tbl ADD INDEX idx (id+1, id, id+1) TYPE minmax SETTINGS allow_suspicious_indices = 1; diff --git a/tests/queries/0_stateless/02710_date_diff_aliases.reference b/tests/queries/0_stateless/02710_date_diff_aliases.reference new file mode 100644 index 00000000000..1eeb5a3a2fa --- /dev/null +++ b/tests/queries/0_stateless/02710_date_diff_aliases.reference @@ -0,0 +1,5 @@ +DATE_DIFF +TIMESTAMP_DIFF +date_diff +timestampDiff +timestamp_diff diff --git a/tests/queries/0_stateless/02710_date_diff_aliases.sql b/tests/queries/0_stateless/02710_date_diff_aliases.sql new file mode 100644 index 00000000000..c6b31c44f95 --- /dev/null +++ b/tests/queries/0_stateless/02710_date_diff_aliases.sql @@ -0,0 +1,7 @@ +SELECT name FROM system.functions +WHERE name = 'date_diff' + OR name = 'DATE_DIFF' + OR name = 'timestampDiff' + OR name = 'timestamp_diff' + OR name = 'TIMESTAMP_DIFF' +ORDER BY name; diff --git a/tests/queries/0_stateless/02710_default_replicated_parameters.reference b/tests/queries/0_stateless/02710_default_replicated_parameters.reference new file mode 100644 index 00000000000..84c643316b1 --- /dev/null +++ b/tests/queries/0_stateless/02710_default_replicated_parameters.reference @@ -0,0 +1,2 @@ +CREATE DATABASE replicated_database_params\nENGINE = Replicated(\'some/path/default/replicated_database_params\', \'{shard}\', \'{replica}\') +CREATE DATABASE replicated_database_params\nENGINE = Replicated(\'some/path/default/replicated_database_params\', \'shard_1\', \'{replica}\') diff --git a/tests/queries/0_stateless/02710_default_replicated_parameters.sql b/tests/queries/0_stateless/02710_default_replicated_parameters.sql new file mode 100644 index 00000000000..279b7e81bdd --- /dev/null +++ b/tests/queries/0_stateless/02710_default_replicated_parameters.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +SET allow_experimental_database_replicated=1; + +DROP DATABASE IF EXISTS replicated_database_params; + +CREATE DATABASE replicated_database_params ENGINE = Replicated('some/path/' || currentDatabase() || '/replicated_database_params'); +SHOW CREATE DATABASE replicated_database_params; +DROP DATABASE replicated_database_params; + +CREATE DATABASE replicated_database_params ENGINE = Replicated('some/path/' || currentDatabase() || '/replicated_database_params', 'shard_1'); +SHOW CREATE DATABASE replicated_database_params; +DROP DATABASE replicated_database_params; diff --git a/tests/queries/0_stateless/02710_protobuf_ipv4_date32.reference b/tests/queries/0_stateless/02710_protobuf_ipv4_date32.reference new file mode 100644 index 00000000000..11383548d90 --- /dev/null +++ b/tests/queries/0_stateless/02710_protobuf_ipv4_date32.reference @@ -0,0 +1,3 @@ +0.0.0.0 0.0.0.0 0.0.0.0 2020-01-01 2020-01-01 2020-01-01 +1.2.3.4 1.2.3.4 1.2.3.4 +255.255.255.255 255.255.255.255 255.255.255.255 diff --git a/tests/queries/0_stateless/02710_protobuf_ipv4_date32.sh b/tests/queries/0_stateless/02710_protobuf_ipv4_date32.sh new file mode 100755 index 00000000000..83657e590bb --- /dev/null +++ b/tests/queries/0_stateless/02710_protobuf_ipv4_date32.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SCHEMADIR=$CURDIR/format_schemas + +$CLICKHOUSE_LOCAL -q "select '0.0.0.0'::IPv4 as ipv4, ipv4 as ipv4_bytes, ipv4 as ipv4_int64, '2020-01-01'::Date32 as date32, date32 as date32_bytes, date32 as date32_int64 format Protobuf settings format_schema = '$SCHEMADIR/02710_schema:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/02710_schema:Message" --structure="ipv4 IPv4, ipv4_bytes IPv4, ipv4_int64 IPv4, date32 Date32, date32_bytes Date32, date32_int64 Date32" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select '1.2.3.4'::IPv4 as ipv4, ipv4 as ipv4_bytes, ipv4 as ipv4_int64 format Protobuf settings format_schema = '$SCHEMADIR/02710_schema:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/02710_schema:Message" --structure="ipv4 IPv4, ipv4_bytes IPv4, ipv4_int64 IPv4" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select '255.255.255.255'::IPv4 as ipv4, ipv4 as ipv4_bytes, ipv4 as ipv4_int64 format Protobuf settings format_schema = '$SCHEMADIR/02710_schema:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/02710_schema:Message" --structure="ipv4 IPv4, ipv4_bytes IPv4, ipv4_int64 IPv4" -q "select * from table" + + + diff --git a/tests/queries/0_stateless/02710_show_table.reference b/tests/queries/0_stateless/02710_show_table.reference new file mode 100644 index 00000000000..7b12c079a9b --- /dev/null +++ b/tests/queries/0_stateless/02710_show_table.reference @@ -0,0 +1,5 @@ +CREATE TABLE default.t_2710_show_table\n(\n `n1` UInt32,\n `s` String\n)\nENGINE = Log +CREATE TABLE default.t_2710_show_table\n(\n `n1` UInt32,\n `s` String\n)\nENGINE = Log +CREATE TABLE default.t_2710_show_table\n(\n `n1` UInt32,\n `s` String\n)\nENGINE = Log +CREATE DATABASE t_2710_db\nENGINE = Atomic +CREATE DATABASE t_2710_db\nENGINE = Atomic diff --git a/tests/queries/0_stateless/02710_show_table.sql b/tests/queries/0_stateless/02710_show_table.sql new file mode 100644 index 00000000000..52682ce83da --- /dev/null +++ b/tests/queries/0_stateless/02710_show_table.sql @@ -0,0 +1,16 @@ +-- Tags: no-parallel +DROP TABLE IF EXISTS t_2710_show_table; + +CREATE TABLE t_2710_show_table(n1 UInt32, s String) engine=Log; +SHOW TABLE t_2710_show_table; +SHOW CREATE TABLE t_2710_show_table; +SHOW CREATE t_2710_show_table; + +DROP TABLE t_2710_show_table; + +DROP DATABASE IF EXISTS t_2710_db; +CREATE DATABASE t_2710_db engine=Atomic; +SHOW DATABASE t_2710_db; +SHOW CREATE DATABASE t_2710_db; + +DROP DATABASE t_2710_db; diff --git a/tests/queries/0_stateless/02710_topk_with_empty_array.reference b/tests/queries/0_stateless/02710_topk_with_empty_array.reference new file mode 100644 index 00000000000..17212447ad8 --- /dev/null +++ b/tests/queries/0_stateless/02710_topk_with_empty_array.reference @@ -0,0 +1 @@ +[[]] diff --git a/tests/queries/0_stateless/02710_topk_with_empty_array.sql b/tests/queries/0_stateless/02710_topk_with_empty_array.sql new file mode 100644 index 00000000000..7de066e9ae4 --- /dev/null +++ b/tests/queries/0_stateless/02710_topk_with_empty_array.sql @@ -0,0 +1 @@ +SELECT topK(emptyArrayInt16()); diff --git a/tests/queries/0_stateless/02711_server_uuid_macro.reference b/tests/queries/0_stateless/02711_server_uuid_macro.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02711_server_uuid_macro.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02711_server_uuid_macro.sql b/tests/queries/0_stateless/02711_server_uuid_macro.sql new file mode 100644 index 00000000000..4f562ad36bf --- /dev/null +++ b/tests/queries/0_stateless/02711_server_uuid_macro.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test; + +-- You can create a table with the {server_uuid} substituted. +CREATE TABLE test (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test', 'replica-{server_uuid}') ORDER BY x; + +-- The server UUID is correctly substituted. +SELECT engine_full LIKE ('%replica-' || serverUUID()::String || '%') FROM system.tables WHERE database = currentDatabase() AND name = 'test'; + +-- An attempt to create a second table with the same UUID results in error. +CREATE TABLE test2 (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test', 'replica-{server_uuid}') ORDER BY x; -- { serverError REPLICA_ALREADY_EXISTS } + +-- The macro {server_uuid} is special, not a configuration-type macro. It's normal that it is inaccessible with the getMacro function. +SELECT getMacro('server_uuid'); -- { serverError NO_ELEMENTS_IN_CONFIG } + +DROP TABLE test SYNC; diff --git a/tests/queries/0_stateless/02711_soundex_function.reference b/tests/queries/0_stateless/02711_soundex_function.reference new file mode 100644 index 00000000000..1b843e030f7 --- /dev/null +++ b/tests/queries/0_stateless/02711_soundex_function.reference @@ -0,0 +1,27 @@ +0000 +0000 +J523 +A000 +F634 +F634 +J525 +J525 +J523 +M235 +M235 +S530 +S530 +--- +0000 +0000 +J523 +A000 +F634 +F634 +J525 +J525 +J523 +M235 +M235 +S530 +S530 diff --git a/tests/queries/0_stateless/02711_soundex_function.sql b/tests/queries/0_stateless/02711_soundex_function.sql new file mode 100644 index 00000000000..d2fe374b341 --- /dev/null +++ b/tests/queries/0_stateless/02711_soundex_function.sql @@ -0,0 +1,28 @@ +SELECT soundex(''); +SELECT soundex('12345'); +SELECT soundex('341Jons54326ton'); +SELECT soundex('A2222222'); +SELECT soundex('Fairdale'); +SELECT soundex('Faredale'); +SELECT soundex('Jon1s2o3n'); +SELECT soundex('Jonson'); +SELECT soundex('Jonston'); +SELECT soundex('M\acDonald22321'); +SELECT soundex('MacDonald'); +SELECT soundex('S3344mith0000'); +SELECT soundex('Smith'); + +SELECT '---'; + +-- same input strings but in a table +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (col String) Engine=MergeTree ORDER BY col; +INSERT INTO tab VALUES ('') ('12345') ('341Jons54326ton') ('A2222222') ('Fairdale') ('Faredale') ('Jon1s2o3n') ('Jonson') ('Jonston') ('M\acDonald22321') ('MacDonald') ('S3344mith0000') ('Smith'); + +SELECT soundex(col) FROM tab; + +DROP TABLE tab; + +-- negative tests +SELECT soundex(toFixedString('Smith', 5)); -- { serverError ILLEGAL_COLUMN } +SELECT soundex(5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02711_trim_aliases.reference b/tests/queries/0_stateless/02711_trim_aliases.reference new file mode 100644 index 00000000000..fa0920cd079 --- /dev/null +++ b/tests/queries/0_stateless/02711_trim_aliases.reference @@ -0,0 +1,3 @@ +ltrim +rtrim +trim diff --git a/tests/queries/0_stateless/02711_trim_aliases.sql b/tests/queries/0_stateless/02711_trim_aliases.sql new file mode 100644 index 00000000000..d0d739805fd --- /dev/null +++ b/tests/queries/0_stateless/02711_trim_aliases.sql @@ -0,0 +1,5 @@ +SELECT name FROM system.functions +WHERE name = 'ltrim' + OR name = 'rtrim' + OR name = 'trim' +ORDER BY name; diff --git a/tests/queries/0_stateless/02712_bool_better_exception_message.reference b/tests/queries/0_stateless/02712_bool_better_exception_message.reference new file mode 100644 index 00000000000..aba60fe1061 --- /dev/null +++ b/tests/queries/0_stateless/02712_bool_better_exception_message.reference @@ -0,0 +1,5 @@ +true +false +1 +1 +1 diff --git a/tests/queries/0_stateless/02712_bool_better_exception_message.sh b/tests/queries/0_stateless/02712_bool_better_exception_message.sh new file mode 100755 index 00000000000..1da70d7025b --- /dev/null +++ b/tests/queries/0_stateless/02712_bool_better_exception_message.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL <&1 | rg -Fc "'w' character" + SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": wtf}'); +END + +$CLICKHOUSE_LOCAL <&1 | rg -Fc "expected 'false'" + SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": ftw}'); +END + +$CLICKHOUSE_LOCAL <&1 | rg -Fc "'{' character" + SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": {}}'); +END diff --git a/tests/queries/0_stateless/02713_array_low_cardinality_string.reference b/tests/queries/0_stateless/02713_array_low_cardinality_string.reference new file mode 100644 index 00000000000..aea0fd62732 --- /dev/null +++ b/tests/queries/0_stateless/02713_array_low_cardinality_string.reference @@ -0,0 +1,12 @@ +--- +tab idx bloom_filter +--- +Expression ((Projection + Before ORDER BY)) + Filter (WHERE) + ReadFromMergeTree (default.tab) + Indexes: + Skip + Name: idx + Description: bloom_filter GRANULARITY 1 + Parts: 1/1 + Granules: 1/1 diff --git a/tests/queries/0_stateless/02713_array_low_cardinality_string.sql b/tests/queries/0_stateless/02713_array_low_cardinality_string.sql new file mode 100644 index 00000000000..4ecd3bf17c1 --- /dev/null +++ b/tests/queries/0_stateless/02713_array_low_cardinality_string.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + foo Array(LowCardinality(String)), + INDEX idx foo TYPE bloom_filter +) +ENGINE = MergeTree +PRIMARY KEY tuple(); + +INSERT INTO tab VALUES (['a', 'b']); + +SELECT '---'; + +SELECT table, name, type +FROM system.data_skipping_indices +WHERE database = currentDatabase() AND table = 'tab'; + +SELECT '---'; + +EXPLAIN indexes = 1 SELECT * FROM tab WHERE has(foo, 'b'); + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02713_create_user_substitutions.reference b/tests/queries/0_stateless/02713_create_user_substitutions.reference new file mode 100644 index 00000000000..f9b5cc495b5 --- /dev/null +++ b/tests/queries/0_stateless/02713_create_user_substitutions.reference @@ -0,0 +1,11 @@ +1 +2 +3 +4 +5 +6 +7 +8 +CREATE USER user9_02713 IDENTIFIED WITH ldap SERVER \'qwerty9\' +CREATE USER user10_02713 IDENTIFIED WITH kerberos REALM \'qwerty10\' +CREATE USER user11_02713 IDENTIFIED WITH ssl_certificate CN \'qwerty11\', \'qwerty12\' diff --git a/tests/queries/0_stateless/02713_create_user_substitutions.sh b/tests/queries/0_stateless/02713_create_user_substitutions.sh new file mode 100755 index 00000000000..42926335acb --- /dev/null +++ b/tests/queries/0_stateless/02713_create_user_substitutions.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS user1_02713, user2_02713, user3_02713, user4_02713, user5_02713, user6_02713, user7_02713"; + +$CLICKHOUSE_CLIENT --param_password=qwerty1 -q "CREATE USER user1_02713 IDENTIFIED BY {password:String}"; +$CLICKHOUSE_CLIENT --param_password=qwerty2 -q "CREATE USER user2_02713 IDENTIFIED WITH PLAINTEXT_PASSWORD BY {password:String}"; +$CLICKHOUSE_CLIENT --param_password=qwerty3 -q "CREATE USER user3_02713 IDENTIFIED WITH SHA256_PASSWORD BY {password:String}"; +$CLICKHOUSE_CLIENT --param_password=qwerty4 -q "CREATE USER user4_02713 IDENTIFIED WITH DOUBLE_SHA1_PASSWORD BY {password:String}"; +$CLICKHOUSE_CLIENT --param_password=qwerty5 -q "CREATE USER user5_02713 IDENTIFIED WITH BCRYPT_PASSWORD BY {password:String}"; + +# Generated online +$CLICKHOUSE_CLIENT --param_hash=310cef2caff72c0224f38ca8e2141ca6012cd4da550c692573c25a917d9a75e6 \ + -q "CREATE USER user6_02713 IDENTIFIED WITH SHA256_HASH BY {hash:String}"; +# Generated with ClickHouse +$CLICKHOUSE_CLIENT --param_hash=5886A74C452575627522F3A80D8B9E239FD8955F \ + -q "CREATE USER user7_02713 IDENTIFIED WITH DOUBLE_SHA1_HASH BY {hash:String}"; +# Generated online +$CLICKHOUSE_CLIENT --param_hash=\$2a\$12\$wuohz0HFSBBNE8huN0Yx6.kmWrefiYVKeMp4gsuNoO1rOWwF2FXXC \ + -q "CREATE USER user8_02713 IDENTIFIED WITH BCRYPT_HASH BY {hash:String}"; + +$CLICKHOUSE_CLIENT --param_server=qwerty9 -q "CREATE USER user9_02713 IDENTIFIED WITH LDAP SERVER {server:String}"; +$CLICKHOUSE_CLIENT --param_realm=qwerty10 -q "CREATE USER user10_02713 IDENTIFIED WITH KERBEROS REALM {realm:String}"; +$CLICKHOUSE_CLIENT --param_cert1=qwerty11 --param_cert2=qwerty12 -q "CREATE USER user11_02713 IDENTIFIED WITH SSL_CERTIFICATE CN {cert1:String}, {cert2:String}"; + +$CLICKHOUSE_CLIENT --user=user1_02713 --password=qwerty1 -q "SELECT 1"; +$CLICKHOUSE_CLIENT --user=user2_02713 --password=qwerty2 -q "SELECT 2"; +$CLICKHOUSE_CLIENT --user=user3_02713 --password=qwerty3 -q "SELECT 3"; +$CLICKHOUSE_CLIENT --user=user4_02713 --password=qwerty4 -q "SELECT 4"; +$CLICKHOUSE_CLIENT --user=user5_02713 --password=qwerty5 -q "SELECT 5"; +$CLICKHOUSE_CLIENT --user=user6_02713 --password=qwerty6 -q "SELECT 6"; +$CLICKHOUSE_CLIENT --user=user7_02713 --password=qwerty7 -q "SELECT 7"; +$CLICKHOUSE_CLIENT --user=user8_02713 --password=qwerty8 -q "SELECT 8"; + +$CLICKHOUSE_CLIENT -q "SHOW CREATE USER user9_02713"; +$CLICKHOUSE_CLIENT -q "SHOW CREATE USER user10_02713"; +$CLICKHOUSE_CLIENT -q "SHOW CREATE USER user11_02713"; + +$CLICKHOUSE_CLIENT -q "DROP USER user1_02713, user2_02713, user3_02713, user4_02713, user5_02713, user6_02713, user7_02713, user8_02713, user9_02713, user10_02713, user11_02713"; diff --git a/tests/queries/0_stateless/02713_ip4_uint_compare.reference b/tests/queries/0_stateless/02713_ip4_uint_compare.reference new file mode 100644 index 00000000000..fdc2de3fbcb --- /dev/null +++ b/tests/queries/0_stateless/02713_ip4_uint_compare.reference @@ -0,0 +1 @@ +1 0 1 1 1 1 0 diff --git a/tests/queries/0_stateless/02713_ip4_uint_compare.sql b/tests/queries/0_stateless/02713_ip4_uint_compare.sql new file mode 100644 index 00000000000..ec8d6584329 --- /dev/null +++ b/tests/queries/0_stateless/02713_ip4_uint_compare.sql @@ -0,0 +1,9 @@ +WITH toIPv4('127.0.0.10') AS ip +SELECT + ip = 2130706442::UInt32, + ip = 0::UInt32, + ip < 2130706443::UInt32, + ip > 2130706441::UInt32, + ip <= 2130706442::UInt32, + ip >= 2130706442::UInt32, + ip != 2130706442::UInt32; diff --git a/tests/queries/0_stateless/02713_sequence_match_serialization_fix.reference b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.reference new file mode 100644 index 00000000000..2a1c127e635 --- /dev/null +++ b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.reference @@ -0,0 +1,3 @@ +serialized state is not used 1 +serialized state is used 1 +via Distributed 1 diff --git a/tests/queries/0_stateless/02713_sequence_match_serialization_fix.sql b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.sql new file mode 100644 index 00000000000..3521cb8470f --- /dev/null +++ b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS 02713_seqt; +DROP TABLE IF EXISTS 02713_seqt_distr; + +SELECT + 'serialized state is not used', sequenceMatch('(?1)(?2)')(time, number_ = 1, number_ = 0) AS seq +FROM +( + SELECT + number AS time, + number % 2 AS number_ + FROM numbers_mt(100) +); + + +CREATE TABLE 02713_seqt +ENGINE = MergeTree +ORDER BY n AS +SELECT + sequenceMatchState('(?1)(?2)')(time, number_ = 1, number_ = 0) AS seq, + 1 AS n +FROM +( + SELECT + number AS time, + number % 2 AS number_ + FROM numbers_mt(100) +); + + +SELECT 'serialized state is used', sequenceMatchMerge('(?1)(?2)')(seq) AS seq +FROM 02713_seqt; + + +CREATE TABLE 02713_seqt_distr ( seq AggregateFunction(sequenceMatch('(?1)(?2)'), UInt64, UInt8, UInt8) , n UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), '02713_seqt'); + +SELECT 'via Distributed', sequenceMatchMerge('(?1)(?2)')(seq) AS seq FROM 02713_seqt_distr; diff --git a/tests/queries/0_stateless/02714_async_inserts_empty_data.reference b/tests/queries/0_stateless/02714_async_inserts_empty_data.reference new file mode 100644 index 00000000000..47ee41ddc0f --- /dev/null +++ b/tests/queries/0_stateless/02714_async_inserts_empty_data.reference @@ -0,0 +1,2 @@ +0 +Ok 0 diff --git a/tests/queries/0_stateless/02714_async_inserts_empty_data.sh b/tests/queries/0_stateless/02714_async_inserts_empty_data.sh new file mode 100755 index 00000000000..b2dc3298733 --- /dev/null +++ b/tests/queries/0_stateless/02714_async_inserts_empty_data.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_async_insert_empty_data" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_async_insert_empty_data (id UInt32) ENGINE = Memory" + +echo -n '' | ${CLICKHOUSE_CURL} -sS "$url&query=INSERT%20INTO%20t_async_insert_empty_data%20FORMAT%20JSONEachRow" --data-binary @- + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_async_insert_empty_data" +${CLICKHOUSE_CLIENT} -q "SELECT status, bytes FROM system.asynchronous_insert_log WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_empty_data'" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_async_insert_empty_data" diff --git a/tests/queries/0_stateless/02714_date_date32_in.reference b/tests/queries/0_stateless/02714_date_date32_in.reference new file mode 100644 index 00000000000..d9ff83f1949 --- /dev/null +++ b/tests/queries/0_stateless/02714_date_date32_in.reference @@ -0,0 +1,4 @@ +1 +1 +0 +0 diff --git a/tests/queries/0_stateless/02714_date_date32_in.sql b/tests/queries/0_stateless/02714_date_date32_in.sql new file mode 100644 index 00000000000..69a087eff6f --- /dev/null +++ b/tests/queries/0_stateless/02714_date_date32_in.sql @@ -0,0 +1,4 @@ +select toDate32('2020-01-01') in (toDate('2020-01-01')); +select toDate('2020-01-01') in (toDate32('2020-01-01')); +select toDate('2020-01-01') in 1::Int64; +select toDate32('2020-01-01') in 1::UInt64; diff --git a/tests/queries/0_stateless/02714_local_object_storage.reference b/tests/queries/0_stateless/02714_local_object_storage.reference new file mode 100644 index 00000000000..b3f28057554 --- /dev/null +++ b/tests/queries/0_stateless/02714_local_object_storage.reference @@ -0,0 +1,2 @@ +1 test +1 test diff --git a/tests/queries/0_stateless/02714_local_object_storage.sql b/tests/queries/0_stateless/02714_local_object_storage.sql new file mode 100644 index 00000000000..fa9025b8b6e --- /dev/null +++ b/tests/queries/0_stateless/02714_local_object_storage.sql @@ -0,0 +1,28 @@ +SET min_bytes_to_use_direct_io='1Gi'; -- It does not work (fixme) +SET local_filesystem_read_method='pread'; -- ui_uring local_fs_method does not work here (fixme) + +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk( + type = 'local_blob_storage', + path = '/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/'); + +INSERT INTO test SELECT 1, 'test'; +SELECT * FROM test; + +DROP TABLE test SYNC; + +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk( + type = 'cache', + max_size = '10Mi', + path = '/var/lib/clickhouse/caches/${CLICKHOUSE_TEST_UNIQUE_NAME}/', + disk = disk(type='local_blob_storage', path='/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/')); + +INSERT INTO test SELECT 1, 'test'; +SELECT * FROM test; + +DROP TABLE test SYNC; diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference new file mode 100644 index 00000000000..d315d85a11e --- /dev/null +++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference @@ -0,0 +1,6 @@ +UInt64 1 8 +UInt64 10 80 +UInt64 1000 8000 +AggregateFunction(argMax, String, DateTime) 1 80 +AggregateFunction(argMax, String, DateTime) 10 800 +AggregateFunction(argMax, String, DateTime) 1000 80000 diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql new file mode 100644 index 00000000000..26bc9ebe62b --- /dev/null +++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql @@ -0,0 +1,59 @@ +CREATE TABLE test (id UInt64, `amax` AggregateFunction(argMax, String, DateTime)) +ENGINE=MergeTree() +ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization=1 -- Sparse columns will take more bytes for a single row +AS + SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime) + FROM numbers(1) + GROUP BY number; + +SELECT sum(id) FROM test FORMAT Null; +SELECT argMaxMerge(amax) FROM test FORMAT Null; + +INSERT INTO test + SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime) + FROM numbers(9) + GROUP BY number; + +SELECT sum(id) FROM test FORMAT Null; +SELECT argMaxMerge(amax) FROM test FORMAT Null; + +INSERT INTO test +SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime) +FROM numbers(990) +GROUP BY number; + +SELECT sum(id) FROM test FORMAT Null; +SELECT argMaxMerge(amax) FROM test FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'UInt64', + read_rows, + read_bytes +FROM system.query_log +WHERE + current_database = currentDatabase() AND + query = 'SELECT sum(id) FROM test FORMAT Null;' AND + type = 2 AND event_date >= yesterday() +ORDER BY event_time_microseconds; + +-- Size of ColumnAggregateFunction: Number of pointers * pointer size + arena size +-- 1 * 8 + AggregateFunction(argMax, String, DateTime) +-- +-- Size of AggregateFunction(argMax, String, DateTime): +-- SingleValueDataString() + SingleValueDataFixed(DateTime) +-- SingleValueDataString = 64B for small strings, 64B + string size + 1 for larger +-- SingleValueDataFixed(DateTime) = 1 + 4. With padding = 8 +-- SingleValueDataString Total: 72B +-- +-- ColumnAggregateFunction total: 8 + 72 = 80 +SELECT 'AggregateFunction(argMax, String, DateTime)', + read_rows, + read_bytes +FROM system.query_log +WHERE + current_database = currentDatabase() AND + query = 'SELECT argMaxMerge(amax) FROM test FORMAT Null;' AND + type = 2 AND event_date >= yesterday() +ORDER BY event_time_microseconds; diff --git a/tests/queries/0_stateless/02715_bit_operations_float.reference b/tests/queries/0_stateless/02715_bit_operations_float.reference new file mode 100644 index 00000000000..a208d00f211 --- /dev/null +++ b/tests/queries/0_stateless/02715_bit_operations_float.reference @@ -0,0 +1,2 @@ +1 1 1 1 +12 11 41 12 diff --git a/tests/queries/0_stateless/02715_bit_operations_float.sql b/tests/queries/0_stateless/02715_bit_operations_float.sql new file mode 100644 index 00000000000..6dc0a14e9b4 --- /dev/null +++ b/tests/queries/0_stateless/02715_bit_operations_float.sql @@ -0,0 +1,8 @@ +SELECT bitNot(-inf) != 0, bitNot(inf) != 0, bitNot(3.40282e+38) != 0, bitNot(nan) != 0; +SELECT bitCount(-inf), bitCount(inf), bitCount(3.40282e+38), bitCount(nan); + +SELECT bitAnd(1.0, 1.0); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitOr(1.0, 1.0); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitRotateLeft(1.0, 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitShiftLeft(1.0, 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT bitTest(1.0, 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02715_or_null.reference b/tests/queries/0_stateless/02715_or_null.reference new file mode 100644 index 00000000000..9ec7b825e5c --- /dev/null +++ b/tests/queries/0_stateless/02715_or_null.reference @@ -0,0 +1,3 @@ +\N +1 1 +1 diff --git a/tests/queries/0_stateless/02715_or_null.sql b/tests/queries/0_stateless/02715_or_null.sql new file mode 100644 index 00000000000..f020dd2c74c --- /dev/null +++ b/tests/queries/0_stateless/02715_or_null.sql @@ -0,0 +1,33 @@ +SELECT argMaxOrNull(id, timestamp) +FROM +( + SELECT + CAST(NULL, 'Nullable(UInt32)') AS id, + 2 AS timestamp +); + +SELECT + argMax(id, timestamp), + argMaxOrNull(id, timestamp) +FROM +( + SELECT + CAST(NULL, 'Nullable(UInt32)') AS id, + 2 AS timestamp + UNION ALL + SELECT + 1 AS id, + 1 AS timestamp +); + +SELECT argMaxIfOrNull(id, timestamp, id IS NOT NULL) +FROM +( + SELECT + CAST(NULL, 'Nullable(UInt32)') AS id, + 2 AS timestamp + UNION ALL + SELECT + 1 AS id, + 1 AS timestamp +); diff --git a/tests/queries/0_stateless/02716_int256_arrayfunc.reference b/tests/queries/0_stateless/02716_int256_arrayfunc.reference new file mode 100644 index 00000000000..d95216fd264 --- /dev/null +++ b/tests/queries/0_stateless/02716_int256_arrayfunc.reference @@ -0,0 +1,16 @@ +[0,2] Array(Int128) +[0,2] Array(Int128) +[0,2] Array(Int256) +[0,2] Array(Int256) +--- +[1,3] Array(UInt128) +[1,3] Array(Int128) +[1,3] Array(UInt256) +[1,3] Array(Int256) +[3,4,5] Array(UInt256) +[1,2] Array(Int256) +--- +[1,3] Array(UInt128) +[1,0] Array(Int128) +[1,3] Array(UInt256) +[1,0] Array(Int256) diff --git a/tests/queries/0_stateless/02716_int256_arrayfunc.sql b/tests/queries/0_stateless/02716_int256_arrayfunc.sql new file mode 100644 index 00000000000..779a3168ea6 --- /dev/null +++ b/tests/queries/0_stateless/02716_int256_arrayfunc.sql @@ -0,0 +1,22 @@ +SELECT arrayDifference([toUInt128(1), 3]), toTypeName(arrayDifference([toUInt128(1), 3])); +SELECT arrayDifference([toInt128(1), 3]), toTypeName(arrayDifference([toInt128(1), 3])); +SELECT arrayDifference([toUInt256(1), 3]), toTypeName(arrayDifference([toUInt256(1), 3])); +SELECT arrayDifference([toInt256(1), 3]), toTypeName(arrayDifference([toInt256(1), 3])); + +SELECT '---'; + +SELECT arrayCumSum([toUInt128(1), 2]), toTypeName(arrayCumSum([toUInt128(1), 2])); +SELECT arrayCumSum([toInt128(1), 2]), toTypeName(arrayCumSum([toInt128(1), 2])); +SELECT arrayCumSum([toUInt256(1), 2]), toTypeName(arrayCumSum([toUInt256(1), 2])); +SELECT arrayCumSum([toInt256(1), 2]), toTypeName(arrayCumSum([toInt256(1), 2])); + +SELECT arrayCumSum([3, toInt128(1), toInt256(1)]), toTypeName(arrayCumSum([toUInt256(1), toUInt128(1)])); +SELECT arrayCumSum([toInt256(1), toInt128(1)]), toTypeName(arrayCumSum([toInt256(1), toInt128(1)])); + +SELECT '---'; + +SELECT arrayCumSumNonNegative([toUInt128(1), 2]), toTypeName(arrayCumSumNonNegative([toUInt128(1), 2])); +SELECT arrayCumSumNonNegative([toInt128(1), -2]), toTypeName(arrayCumSumNonNegative([toInt128(1), -2])); +SELECT arrayCumSumNonNegative([toUInt256(1), 2]), toTypeName(arrayCumSumNonNegative([toUInt256(1), 2])); +SELECT arrayCumSumNonNegative([toInt256(1), -2]), toTypeName(arrayCumSumNonNegative([toInt256(1), -2])); + diff --git a/tests/queries/0_stateless/02716_parquet_invalid_date32.reference b/tests/queries/0_stateless/02716_parquet_invalid_date32.reference new file mode 100644 index 00000000000..1052b48e58c --- /dev/null +++ b/tests/queries/0_stateless/02716_parquet_invalid_date32.reference @@ -0,0 +1,2 @@ +1 +200000 diff --git a/tests/queries/0_stateless/02716_parquet_invalid_date32.sh b/tests/queries/0_stateless/02716_parquet_invalid_date32.sh new file mode 100755 index 00000000000..5b909eecc34 --- /dev/null +++ b/tests/queries/0_stateless/02716_parquet_invalid_date32.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/02716_data.parquet', auto, 'date Date32')" 2>&1 | grep -c "VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE" + +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/02716_data.parquet', auto, 'date Int32')" + diff --git a/tests/queries/0_stateless/02717_pretty_json.reference b/tests/queries/0_stateless/02717_pretty_json.reference new file mode 100644 index 00000000000..a8bd1b450e0 --- /dev/null +++ b/tests/queries/0_stateless/02717_pretty_json.reference @@ -0,0 +1,74 @@ +{ + "num": 42, + "arr": [ + 42, + 42 + ], + "nested_arr": [ + [ + [ + 42, + 42 + ], + [ + 42, + 42 + ] + ], + [ + [ + 42, + 42 + ] + ] + ], + "tuple": { + "a": 42, + "b": 42 + }, + "nested_tuple": { + "a": { + "b": { + "c": 42, + "d": 42 + }, + "e": 42 + }, + "f": 42 + }, + "map": { + "42": 42, + "24": 24 + }, + "nested_map": { + "42": { + "42": { + "42": 42 + } + } + }, + "nested_types": [ + [ + { + "42": 42 + }, + [ + 42, + 42 + ] + ], + [ + { + "42": 42 + }, + [ + 42, + 42 + ] + ] + ], + "json_object": { + "a.b": 1, + "a.c": 2 + } +} diff --git a/tests/queries/0_stateless/02717_pretty_json.sql b/tests/queries/0_stateless/02717_pretty_json.sql new file mode 100644 index 00000000000..8a49eb50adf --- /dev/null +++ b/tests/queries/0_stateless/02717_pretty_json.sql @@ -0,0 +1,3 @@ +set allow_experimental_object_type=1; +select 42 as num, [42, 42] as arr, [[[42, 42], [42, 42]], [[42, 42]]] as nested_arr, tuple(42, 42)::Tuple(a UInt32, b UInt32) as tuple, tuple(tuple(tuple(42, 42), 42), 42)::Tuple(a Tuple(b Tuple(c UInt32, d UInt32), e UInt32), f UInt32) as nested_tuple, map(42, 42, 24, 24) as map, map(42, map(42, map(42, 42))) as nested_map, [tuple(map(42, 42), [42, 42]), tuple(map(42, 42), [42, 42])]::Array(Tuple(Map(UInt32, UInt32), Array(UInt32))) as nested_types, '{"a" : {"b" : 1, "c" : 2}}'::JSON as json_object format PrettyNDJSON; + diff --git a/tests/queries/0_stateless/02718_cli_dashed_options_parsing.reference b/tests/queries/0_stateless/02718_cli_dashed_options_parsing.reference new file mode 100644 index 00000000000..6479f538bd8 --- /dev/null +++ b/tests/queries/0_stateless/02718_cli_dashed_options_parsing.reference @@ -0,0 +1,17 @@ +Test 1: Check that you can specify options with a dashes, not an underscores +Test 1.1: Check option from config - server_logs_file +1 +OK +1 +OK +1 +OK +Test 1.2: Check some option from Settings.h - allow_deprecated_syntax_for_merge_tree +0 +Test 2: check that unicode dashes are handled correctly +Test 2.1: check em-dash support +1 +Test 2.2: check en-dash support +1 +Test 2.3 check mathematical minus support +1 diff --git a/tests/queries/0_stateless/02718_cli_dashed_options_parsing.sh b/tests/queries/0_stateless/02718_cli_dashed_options_parsing.sh new file mode 100755 index 00000000000..ba455a56521 --- /dev/null +++ b/tests/queries/0_stateless/02718_cli_dashed_options_parsing.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +file_name=${CLICKHOUSE_TEST_UNIQUE_NAME} +file_name_1=${file_name}_1 +file_name_2=${file_name}_2 +file_name_3=${file_name}_3 + +################# +echo "Test 1: Check that you can specify options with a dashes, not an underscores" + +[[ -e $file_name_1 ]] && rm $file_name_1 +[[ -e $file_name_2 ]] && rm $file_name_2 +[[ -e $file_name_3 ]] && rm $file_name_3 + +echo "Test 1.1: Check option from config - server_logs_file" + +$CLICKHOUSE_LOCAL --log-level=debug --server-logs-file=$file_name_1 -q "SELECT 1;" 2> /dev/null +[[ -e $file_name_1 ]] && echo OK +$CLICKHOUSE_LOCAL --log-level=debug --server-logs-file $file_name_2 -q "SELECT 1;" 2> /dev/null +[[ -e $file_name_2 ]] && echo OK +$CLICKHOUSE_LOCAL --log-level=debug --server_logs_file $file_name_3 -q "SELECT 1;" 2> /dev/null +[[ -e $file_name_3 ]] && echo OK + +echo "Test 1.2: Check some option from Settings.h - allow_deprecated_syntax_for_merge_tree" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test"; +$CLICKHOUSE_CLIENT --allow-deprecated-syntax-for-merge-tree=1 --query="CREATE TABLE test (d Date, s String) ENGINE = MergeTree(d, s, 8192)"; +$CLICKHOUSE_CLIENT --query="DROP TABLE test"; +echo $? + +################# +echo "Test 2: check that unicode dashes are handled correctly" + +echo "Test 2.1: check em-dash support" +# Unicode code: U+2014 +$CLICKHOUSE_LOCAL —query "SELECT 1"; + +echo "Test 2.2: check en-dash support" +# Unicode code: U+2013 +$CLICKHOUSE_LOCAL –query "SELECT 1"; + +echo "Test 2.3 check mathematical minus support" +# Unicode code: U+2212 +$CLICKHOUSE_LOCAL −query "SELECT 1"; + +rm $file_name_1 +rm $file_name_2 +rm $file_name_3 diff --git a/tests/queries/0_stateless/02718_insert_meet_hardware_error.reference b/tests/queries/0_stateless/02718_insert_meet_hardware_error.reference new file mode 100644 index 00000000000..f599e28b8ab --- /dev/null +++ b/tests/queries/0_stateless/02718_insert_meet_hardware_error.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/02718_insert_meet_hardware_error.sql b/tests/queries/0_stateless/02718_insert_meet_hardware_error.sql new file mode 100644 index 00000000000..ab90bbecc04 --- /dev/null +++ b/tests/queries/0_stateless/02718_insert_meet_hardware_error.sql @@ -0,0 +1,20 @@ +-- Tags: zookeeper, no-parallel + +DROP TABLE IF EXISTS t_hardware_error NO DELAY; + +CREATE TABLE t_hardware_error ( + KeyID UInt32 +) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') +ORDER BY (KeyID); + +insert into t_hardware_error values (1), (2), (3), (4), (5); + +system enable failpoint replicated_merge_tree_commit_zk_fail_after_op; + +insert into t_hardware_error values (6), (7), (8), (9), (10); + +select count() from t_hardware_error; + +system disable failpoint replicated_commit_zk_fail_after_op; + +DROP TABLE t_hardware_error NO DELAY; diff --git a/tests/queries/0_stateless/02718_parquet_metadata_format.reference b/tests/queries/0_stateless/02718_parquet_metadata_format.reference new file mode 100644 index 00000000000..1f55c29da56 --- /dev/null +++ b/tests/queries/0_stateless/02718_parquet_metadata_format.reference @@ -0,0 +1,225 @@ +{ + "num_columns": "3", + "num_rows": "100000", + "num_row_groups": "2", + "format_version": "2.6", + "metadata_size": "617", + "total_uncompressed_size": "314147", + "total_compressed_size": "27081", + "columns": [ + { + "name": "number", + "path": "number", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "INT32", + "logical_type": "Int(bitWidth=16, isSigned=false)", + "compression": "LZ4", + "total_uncompressed_size": "133321", + "total_compressed_size": "13293", + "space_saved": "90.03%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + }, + { + "name": "str", + "path": "str", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "BYTE_ARRAY", + "logical_type": "None", + "compression": "LZ4", + "total_uncompressed_size": "149115", + "total_compressed_size": "13340", + "space_saved": "91.05%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + }, + { + "name": "mod", + "path": "mod", + "max_definition_level": "1", + "max_repetition_level": "0", + "physical_type": "INT32", + "logical_type": "Int(bitWidth=8, isSigned=false)", + "compression": "LZ4", + "total_uncompressed_size": "31711", + "total_compressed_size": "448", + "space_saved": "98.59%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + } + ], + "row_groups": [ + { + "num_columns": "3", + "num_rows": "65409", + "total_uncompressed_size": "200527", + "total_compressed_size": "14406", + "columns": [ + { + "name": "number", + "path": "number", + "total_compressed_size": "7070", + "total_uncompressed_size": "85956", + "have_statistics": true, + "statistics": { + "num_values": "65409", + "null_count": "0", + "distinct_count": null, + "min": "0", + "max": "999" + } + }, + { + "name": "str", + "path": "str", + "total_compressed_size": "7093", + "total_uncompressed_size": "93853", + "have_statistics": true, + "statistics": { + "num_values": "65409", + "null_count": "0", + "distinct_count": null, + "min": "Hello0", + "max": "Hello999" + } + }, + { + "name": "mod", + "path": "mod", + "total_compressed_size": "243", + "total_uncompressed_size": "20718", + "have_statistics": true, + "statistics": { + "num_values": "32705", + "null_count": "32704", + "distinct_count": null, + "min": "0", + "max": "8" + } + } + ] + }, + { + "num_columns": "3", + "num_rows": "34591", + "total_uncompressed_size": "113620", + "total_compressed_size": "12675", + "columns": [ + { + "name": "number", + "path": "number", + "total_compressed_size": "6223", + "total_uncompressed_size": "47365", + "have_statistics": true, + "statistics": { + "num_values": "34591", + "null_count": "0", + "distinct_count": null, + "min": "0", + "max": "999" + } + }, + { + "name": "str", + "path": "str", + "total_compressed_size": "6247", + "total_uncompressed_size": "55262", + "have_statistics": true, + "statistics": { + "num_values": "34591", + "null_count": "0", + "distinct_count": null, + "min": "Hello0", + "max": "Hello999" + } + }, + { + "name": "mod", + "path": "mod", + "total_compressed_size": "205", + "total_uncompressed_size": "10993", + "have_statistics": true, + "statistics": { + "num_values": "17295", + "null_count": "17296", + "distinct_count": null, + "min": "0", + "max": "8" + } + } + ] + } + ] +} +{ + "num_columns": "3", + "format_version": "2.6" +} +{ + "columns": [ + { + "name": "number", + "path": "number", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "INT32", + "logical_type": "Int(bitWidth=16, isSigned=false)", + "compression": "LZ4", + "total_uncompressed_size": "133321", + "total_compressed_size": "13293", + "space_saved": "90.03%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + }, + { + "name": "str", + "path": "str", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "BYTE_ARRAY", + "logical_type": "None", + "compression": "LZ4", + "total_uncompressed_size": "149115", + "total_compressed_size": "13340", + "space_saved": "91.05%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + }, + { + "name": "mod", + "path": "mod", + "max_definition_level": "1", + "max_repetition_level": "0", + "physical_type": "INT32", + "logical_type": "Int(bitWidth=8, isSigned=false)", + "compression": "LZ4", + "total_uncompressed_size": "31711", + "total_compressed_size": "448", + "space_saved": "98.59%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + } + ] +} +1 +1 diff --git a/tests/queries/0_stateless/02718_parquet_metadata_format.sh b/tests/queries/0_stateless/02718_parquet_metadata_format.sh new file mode 100755 index 00000000000..94d7f453850 --- /dev/null +++ b/tests/queries/0_stateless/02718_parquet_metadata_format.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata) format JSONEachRow" | python3 -m json.tool + +$CLICKHOUSE_LOCAL -q "select num_columns, format_version from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata, 'num_columns UInt64, format_version String') format JSONEachRow" | python3 -m json.tool + +$CLICKHOUSE_LOCAL -q "select columns from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata) format JSONEachRow" | python3 -m json.tool + + +$CLICKHOUSE_LOCAL -q "select some_column from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata, 'some_column Array(UInt32)')" 2>&1 | grep -c "BAD_ARGUMENTS" + +$CLICKHOUSE_LOCAL -q "select num_columns from file('$CURDIR/data_parquet/02718_data.parquet', ParquetMetadata, 'num_columns Array(UInt32)')" 2>&1 | grep -c "BAD_ARGUMENTS" + + diff --git a/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.reference b/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.reference new file mode 100644 index 00000000000..e9b4456e5c7 --- /dev/null +++ b/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.reference @@ -0,0 +1,4 @@ +-----------String------------ +\N 1 + 2 +s 1 diff --git a/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql b/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql new file mode 100644 index 00000000000..7930b2ca0cc --- /dev/null +++ b/tests/queries/0_stateless/02719_aggregate_with_empty_string_key.sql @@ -0,0 +1,7 @@ +drop table if exists test ; +create table test(str Nullable(String), i Int64) engine=Memory(); +insert into test values(null, 1),('', 2),('s', 1); +select '-----------String------------'; +select str ,max(i) from test group by str; + +drop table test; diff --git a/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference b/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference new file mode 100644 index 00000000000..dd2c30cc9f8 --- /dev/null +++ b/tests/queries/0_stateless/02720_row_policy_column_with_dots.reference @@ -0,0 +1 @@ +2024-01-01 Hello World diff --git a/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql b/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql new file mode 100644 index 00000000000..361bd0e0ec7 --- /dev/null +++ b/tests/queries/0_stateless/02720_row_policy_column_with_dots.sql @@ -0,0 +1,6 @@ +CREATE table if not exists table_with_dot_column (date Date, regular_column String, `other_column.2` String) ENGINE = MergeTree() ORDER BY date; +INSERT INTO table_with_dot_column select '2020-01-01', 'Hello', 'World'; +INSERT INTO table_with_dot_column select '2024-01-01', 'Hello', 'World'; +CREATE ROW POLICY IF NOT EXISTS row_policy ON table_with_dot_column USING toDate(date) >= today() - 30 TO ALL; +SELECT * FROM table_with_dot_column; +DROP TABLE table_with_dot_column; diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference new file mode 100644 index 00000000000..f7c4ece5f1f --- /dev/null +++ b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference @@ -0,0 +1,4 @@ +part size: 6000001, part number: 1 +part size: 6000001, part number: 2 +part size: 6000001, part number: 3 +part size: 2971517, part number: 4 diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh new file mode 100755 index 00000000000..9799ef0478a --- /dev/null +++ b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, long +# Tag no-fasttest: requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +in="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.in" +out="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.out" +log="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.log" + +set -e +trap 'rm -f "${out:?}" "${in:?}" "${log:?}"' EXIT + +# Generate a file of 20MiB in size, with our part size it will have 4 parts +# NOTE: 1 byte is for new line, so 1023 not 1024 +$CLICKHOUSE_LOCAL -q "SELECT randomPrintableASCII(1023) FROM numbers(20*1024) FORMAT LineAsString" > "$in" + +$CLICKHOUSE_CLIENT --send_logs_level=trace --server_logs_file="$log" -q "INSERT INTO FUNCTION s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" --s3_strict_upload_part_size=6000001 < "$in" +grep -F '' "$log" || : +grep -o 'WriteBufferFromS3: writePart.*, part size: .*' "$log" | grep -o 'part size: .*' +$CLICKHOUSE_CLIENT -q "SELECT * FROM s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" > "$out" + +diff -q "$in" "$out" diff --git a/tests/queries/0_stateless/02721_parquet_field_not_found.reference b/tests/queries/0_stateless/02721_parquet_field_not_found.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02721_parquet_field_not_found.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02721_parquet_field_not_found.sh b/tests/queries/0_stateless/02721_parquet_field_not_found.sh new file mode 100755 index 00000000000..72925afec6e --- /dev/null +++ b/tests/queries/0_stateless/02721_parquet_field_not_found.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 42 as x format Parquet" | $CLICKHOUSE_LOCAL --input-format Parquet --structure "x UInt32, y UInt32" -q "select * from table" 2>&1 | grep -c "THERE_IS_NO_COLUMN" + diff --git a/tests/queries/0_stateless/02721_url_cluster.reference b/tests/queries/0_stateless/02721_url_cluster.reference new file mode 100644 index 00000000000..36ef7609920 --- /dev/null +++ b/tests/queries/0_stateless/02721_url_cluster.reference @@ -0,0 +1,136 @@ +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 UInt64 +c2 UInt64 +c3 UInt64 +c1 UInt64 +c2 UInt64 +c3 UInt64 +12 +12 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +1 2 3 +4 5 6 +7 8 9 +0 0 0 diff --git a/tests/queries/0_stateless/02721_url_cluster.sql b/tests/queries/0_stateless/02721_url_cluster.sql new file mode 100644 index 00000000000..c30b03495cd --- /dev/null +++ b/tests/queries/0_stateless/02721_url_cluster.sql @@ -0,0 +1,40 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv') ORDER BY c1, c2, c3; +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV') ORDER BY c1, c2, c3; +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64') ORDER BY c1, c2, c3; +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto') ORDER BY c1, c2, c3; + +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV'); +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64'); +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto'); + +select COUNT() from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); +select COUNT(*) from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); + +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto'); + +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', headers('X-ClickHouse-Database'='default'), 'http://localhost:11111/test/{a,b}.tsv'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers('X-ClickHouse-Database'='default'), 'TSV'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto', headers('X-ClickHouse-Database'='default')); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers('X-ClickHouse-Database'='default')); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', headers('X-ClickHouse-Database'='default'), 'auto', 'auto'); + +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto') order by c1, c2, c3; + +drop table if exists test; +create table test (x UInt32, y UInt32, z UInt32) engine=Memory(); +insert into test select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/a.tsv', 'TSV'); +select * from test; +drop table test; + diff --git a/tests/queries/0_stateless/02722_line_as_string_consistency.reference b/tests/queries/0_stateless/02722_line_as_string_consistency.reference new file mode 100644 index 00000000000..f71e78457a8 --- /dev/null +++ b/tests/queries/0_stateless/02722_line_as_string_consistency.reference @@ -0,0 +1,7 @@ +0D48656C6C6F 9999 +0D 1 +48656C6C6F 1 +--- +0D48656C6C6F 9999 +0D 1 +48656C6C6F 1 diff --git a/tests/queries/0_stateless/02722_line_as_string_consistency.sh b/tests/queries/0_stateless/02722_line_as_string_consistency.sh new file mode 100755 index 00000000000..be1942a9164 --- /dev/null +++ b/tests/queries/0_stateless/02722_line_as_string_consistency.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# The LineAsString format reads every line until the newline character (\n). +# The DOS or MacOS Classic line breaks \r\n or \n\r don't have special support. + +# While the behavior described above can be changed in future to add the support for DOS or MacOS Classic, +# the most important is that it should be exactly the same (consistent) +# regardless whether the parallel parsing enabled or not, +# and this test checks that. + +for _ in {1..10000}; do echo -ne 'Hello\n\r'; done | $CLICKHOUSE_LOCAL --min_chunk_bytes_for_parallel_parsing 1 --input_format_parallel_parsing 0 --query "SELECT hex(*), count() FROM table GROUP BY ALL ORDER BY 2 DESC, 1" --input-format LineAsString +echo '---' +for _ in {1..10000}; do echo -ne 'Hello\n\r'; done | $CLICKHOUSE_LOCAL --min_chunk_bytes_for_parallel_parsing 1 --input_format_parallel_parsing 1 --query "SELECT hex(*), count() FROM table GROUP BY ALL ORDER BY 2 DESC, 1" --input-format LineAsString diff --git a/tests/queries/0_stateless/02722_log_profile_events.reference b/tests/queries/0_stateless/02722_log_profile_events.reference new file mode 100644 index 00000000000..0d66ea1aee9 --- /dev/null +++ b/tests/queries/0_stateless/02722_log_profile_events.reference @@ -0,0 +1,2 @@ +0 +1 diff --git a/tests/queries/0_stateless/02722_log_profile_events.sql b/tests/queries/0_stateless/02722_log_profile_events.sql new file mode 100644 index 00000000000..07e2d524d53 --- /dev/null +++ b/tests/queries/0_stateless/02722_log_profile_events.sql @@ -0,0 +1,5 @@ +-- There are no fatal errors: +SELECT count() FROM system.events WHERE event = 'LogFatal'; + +-- It counts the trace log messages: +SELECT count() > 0 FROM system.events WHERE event = 'LogTrace'; diff --git a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.reference b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.reference new file mode 100644 index 00000000000..6f9b4b4fc6a --- /dev/null +++ b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.reference @@ -0,0 +1,7 @@ +-- { echoOn } +SYSTEM DROP COMPILED EXPRESSION CACHE; +SELECT minIf(num1, num1 < 5) FROM dummy GROUP BY num2; +0 +SYSTEM DROP COMPILED EXPRESSION CACHE; +SELECT minIf(num1, num1 >= 5) FROM dummy GROUP BY num2; +5 diff --git a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql new file mode 100644 index 00000000000..04e0fc5e0ba --- /dev/null +++ b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql @@ -0,0 +1,17 @@ +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64 + +drop table if exists dummy; +CREATE TABLE dummy ( num1 Int32, num2 Enum8('foo' = 0, 'bar' = 1, 'tar' = 2) ) +ENGINE = MergeTree ORDER BY num1 as select 5, 'bar'; + +set compile_aggregate_expressions=1; +set min_count_to_compile_aggregate_expression=0; + +-- { echoOn } +SYSTEM DROP COMPILED EXPRESSION CACHE; +SELECT minIf(num1, num1 < 5) FROM dummy GROUP BY num2; +SYSTEM DROP COMPILED EXPRESSION CACHE; +SELECT minIf(num1, num1 >= 5) FROM dummy GROUP BY num2; +-- { echoOff } + +drop table dummy; diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.reference b/tests/queries/0_stateless/02723_parallelize_output_setting.reference new file mode 100644 index 00000000000..0f2a396f471 --- /dev/null +++ b/tests/queries/0_stateless/02723_parallelize_output_setting.reference @@ -0,0 +1,7 @@ +-- { echoOn } +set parallelize_output_from_storages=1; +select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize; +1 +-- no Resize in pipeline +set parallelize_output_from_storages=0; +select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize; diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.sql b/tests/queries/0_stateless/02723_parallelize_output_setting.sql new file mode 100644 index 00000000000..7db28ca4dec --- /dev/null +++ b/tests/queries/0_stateless/02723_parallelize_output_setting.sql @@ -0,0 +1,12 @@ +-- Tags: no-parallel + +insert into function file(data_02723.csv) select number from numbers(5) settings engine_file_truncate_on_insert=1; + +set max_threads=2; +-- { echoOn } +set parallelize_output_from_storages=1; +select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize; +-- no Resize in pipeline +set parallelize_output_from_storages=0; +select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize; + diff --git a/tests/queries/0_stateless/02723_param_exception_message_context.reference b/tests/queries/0_stateless/02723_param_exception_message_context.reference new file mode 100644 index 00000000000..286f53421b5 --- /dev/null +++ b/tests/queries/0_stateless/02723_param_exception_message_context.reference @@ -0,0 +1 @@ +for query parameter 'x' diff --git a/tests/queries/0_stateless/02723_param_exception_message_context.sh b/tests/queries/0_stateless/02723_param_exception_message_context.sh new file mode 100755 index 00000000000..d37badd444f --- /dev/null +++ b/tests/queries/0_stateless/02723_param_exception_message_context.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# The exception message about unparsed parameter also tells about the name of the parameter. +$CLICKHOUSE_CLIENT --param_x Hello --query "SELECT {x:Array(String)}" 2>&1 | rg -oF "for query parameter 'x'" | uniq diff --git a/tests/queries/0_stateless/02723_zookeeper_name.reference b/tests/queries/0_stateless/02723_zookeeper_name.reference new file mode 100644 index 00000000000..074712bd8fe --- /dev/null +++ b/tests/queries/0_stateless/02723_zookeeper_name.reference @@ -0,0 +1,4 @@ +Create Tables +Insert Data +"t1","default",1 +"t2","default",1 diff --git a/tests/queries/0_stateless/02723_zookeeper_name.sql b/tests/queries/0_stateless/02723_zookeeper_name.sql new file mode 100644 index 00000000000..e6374767951 --- /dev/null +++ b/tests/queries/0_stateless/02723_zookeeper_name.sql @@ -0,0 +1,23 @@ +-- Tags: zookeeper, replica + +SELECT 'Create Tables'; +CREATE TABLE t1(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02723/zookeeper_name/t1', '1') ORDER BY k; + +CREATE TABLE t2(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02723/zookeeper_name/t2', '1') ORDER BY k; + +SELECT 'Insert Data'; + +INSERT INTO t1 SELECT * FROM generateRandom('k UInt32, v UInt32') LIMIT 1; +INSERT INTO t2 SELECT * FROM generateRandom('k UInt32, v UInt32') LIMIT 1; + +SELECT + table, zookeeper_name, count() +FROM system.replicas +INNER JOIN system.parts USING (database, table) +WHERE database = currentDatabase() +GROUP BY table, zookeeper_name +ORDER BY table, zookeeper_name +FORMAT CSV; + +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.reference b/tests/queries/0_stateless/02724_decompress_filename_exception.reference new file mode 100644 index 00000000000..f9c5aacff7b --- /dev/null +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.reference @@ -0,0 +1,8 @@ +Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.sh b/tests/queries/0_stateless/02724_decompress_filename_exception.sh new file mode 100755 index 00000000000..bbc2b8d066b --- /dev/null +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx" + +echo 'corrupted file' > $FILENAME; + +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'gzip')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'deflate')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'br')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'xz')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'zstd')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'lz4')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'bz2')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'snappy')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; + +rm $FILENAME; diff --git a/tests/queries/0_stateless/02724_delay_mutations.reference b/tests/queries/0_stateless/02724_delay_mutations.reference new file mode 100644 index 00000000000..16bd972a06d --- /dev/null +++ b/tests/queries/0_stateless/02724_delay_mutations.reference @@ -0,0 +1,8 @@ +1 2 +4 +1 6 +0 +ALTER TABLE t_delay_mutations UPDATE v = 3 WHERE 1; 0 0 +ALTER TABLE t_delay_mutations UPDATE v = 4 WHERE 1; 0 0 +ALTER TABLE t_delay_mutations UPDATE v = 5 WHERE 1; 1 1 +ALTER TABLE t_delay_mutations UPDATE v = 6 WHERE 1; 1 1 diff --git a/tests/queries/0_stateless/02724_delay_mutations.sh b/tests/queries/0_stateless/02724_delay_mutations.sh new file mode 100755 index 00000000000..f349e29253a --- /dev/null +++ b/tests/queries/0_stateless/02724_delay_mutations.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# shellcheck source=./mergetree_mutations.lib +. "$CURDIR"/mergetree_mutations.lib + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS t_delay_mutations SYNC; + +CREATE TABLE t_delay_mutations (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS + number_of_mutations_to_delay = 2, + number_of_mutations_to_throw = 10, + min_delay_to_mutate_ms = 10, + min_delay_to_mutate_ms = 1000; + +SET mutations_sync = 0; +SYSTEM STOP MERGES t_delay_mutations; + +INSERT INTO t_delay_mutations VALUES (1, 2); + +ALTER TABLE t_delay_mutations UPDATE v = 3 WHERE 1; +ALTER TABLE t_delay_mutations UPDATE v = 4 WHERE 1; + +ALTER TABLE t_delay_mutations UPDATE v = 5 WHERE 1; +ALTER TABLE t_delay_mutations UPDATE v = 6 WHERE 1; + +SELECT * FROM t_delay_mutations ORDER BY id; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done; +" + +${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_delay_mutations" +wait_for_mutation "t_delay_mutations" "mutation_5.txt" + +${CLICKHOUSE_CLIENT} -n --query " +SELECT * FROM t_delay_mutations ORDER BY id; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done; + +DROP TABLE IF EXISTS t_delay_mutations SYNC; +" + +${CLICKHOUSE_CLIENT} -n --query " +SYSTEM FLUSH LOGS; + +SELECT + query, + ProfileEvents['DelayedMutations'], + ProfileEvents['DelayedMutationsMilliseconds'] BETWEEN 10 AND 1000 +FROM system.query_log +WHERE + type = 'QueryFinish' AND + current_database = '$CLICKHOUSE_DATABASE' AND + query ILIKE 'ALTER TABLE t_delay_mutations UPDATE%' +ORDER BY query; +" diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql new file mode 100644 index 00000000000..13dfb5debe7 --- /dev/null +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql @@ -0,0 +1,8 @@ +select count(*) +from ( + select 1 as id, [1, 2, 3] as arr +) as sessions +ASOF LEFT JOIN ( + select 1 as session_id, 4 as id +) as visitors +ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id diff --git a/tests/queries/0_stateless/02724_jit_logical_functions.reference b/tests/queries/0_stateless/02724_jit_logical_functions.reference new file mode 100644 index 00000000000..673ffe02613 --- /dev/null +++ b/tests/queries/0_stateless/02724_jit_logical_functions.reference @@ -0,0 +1,18 @@ +Logical functions not null +0 0 0 0 0 +0 1 0 1 1 +1 0 0 1 1 +1 1 1 1 0 +Logical functions nullable +0 0 0 0 0 +0 1 0 1 1 +1 0 0 1 1 +1 1 1 1 0 +0 \N 0 \N \N +1 \N \N 1 \N +0 0 0 +1 1 0 +0 0 0 +1 1 0 +\N \N \N +\N \N \N diff --git a/tests/queries/0_stateless/02724_jit_logical_functions.sql b/tests/queries/0_stateless/02724_jit_logical_functions.sql new file mode 100644 index 00000000000..fe6646337d0 --- /dev/null +++ b/tests/queries/0_stateless/02724_jit_logical_functions.sql @@ -0,0 +1,21 @@ +SET compile_expressions = 1; +SET min_count_to_compile_expression = 0; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (a UInt8, b UInt8) ENGINE = TinyLog; +INSERT INTO test_table VALUES (0, 0), (0, 1), (1, 0), (1, 1); + +SELECT 'Logical functions not null'; +SELECT a, b, and(a, b), or(a, b), xor(a, b) FROM test_table; + +DROP TABLE test_table; + +DROP TABLE IF EXISTS test_table_nullable; +CREATE TABLE test_table_nullable (a UInt8, b Nullable(UInt8)) ENGINE = TinyLog; +INSERT INTO test_table_nullable VALUES (0, 0), (0, 1), (1, 0), (1, 1), (0, NULL), (1, NULL); + +SELECT 'Logical functions nullable'; +SELECT a, b, and(a, b), or(a, b), xor(a, b) FROM test_table_nullable; +SELECT and(b, b), or(b, b), xor(b, b) FROM test_table_nullable; + +DROP TABLE test_table_nullable; diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.reference b/tests/queries/0_stateless/02724_limit_num_mutations.reference new file mode 100644 index 00000000000..5742648c79d --- /dev/null +++ b/tests/queries/0_stateless/02724_limit_num_mutations.reference @@ -0,0 +1,9 @@ +1 2 +2 +CREATE TABLE default.t_limit_mutations\n(\n `id` UInt64,\n `v` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/t_limit_mutations\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192 +1 2 +4 +CREATE TABLE default.t_limit_mutations\n(\n `id` UInt64,\n `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/t_limit_mutations\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192 +1 6 +0 +CREATE TABLE default.t_limit_mutations\n(\n `id` UInt64,\n `v` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/t_limit_mutations\', \'1\')\nORDER BY id\nSETTINGS number_of_mutations_to_throw = 2, index_granularity = 8192 diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh new file mode 100755 index 00000000000..a9d69b2ed48 --- /dev/null +++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# shellcheck source=./mergetree_mutations.lib +. "$CURDIR"/mergetree_mutations.lib + +set -e + +function wait_for_alter() +{ + type=$1 + for i in {1..100}; do + sleep 0.1 + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE t_limit_mutations" | grep -q "\`v\` $type" && break; + + if [[ $i -eq 100 ]]; then + echo "Timed out while waiting for alter to execute" + fi + done +} + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS t_limit_mutations SYNC; + +CREATE TABLE t_limit_mutations (id UInt64, v UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/t_limit_mutations', '1') ORDER BY id +SETTINGS number_of_mutations_to_throw = 2; + +SET mutations_sync = 0; +SYSTEM STOP MERGES t_limit_mutations; + +INSERT INTO t_limit_mutations VALUES (1, 2); + +ALTER TABLE t_limit_mutations UPDATE v = 3 WHERE 1; +ALTER TABLE t_limit_mutations UPDATE v = 4 WHERE 1; + +SYSTEM SYNC REPLICA t_limit_mutations PULL; + +ALTER TABLE t_limit_mutations UPDATE v = 5 WHERE 1; -- { serverError TOO_MANY_MUTATIONS } +ALTER TABLE t_limit_mutations MODIFY COLUMN v String; -- { serverError TOO_MANY_MUTATIONS } + +SELECT * FROM t_limit_mutations ORDER BY id; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; +SHOW CREATE TABLE t_limit_mutations; +" + +${CLICKHOUSE_CLIENT} -n --query " +ALTER TABLE t_limit_mutations UPDATE v = 6 WHERE 1 SETTINGS number_of_mutations_to_throw = 100; +ALTER TABLE t_limit_mutations MODIFY COLUMN v String SETTINGS number_of_mutations_to_throw = 100, alter_sync = 0; +" + +wait_for_alter "String" + +${CLICKHOUSE_CLIENT} -n --query " +SELECT * FROM t_limit_mutations ORDER BY id; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; +SHOW CREATE TABLE t_limit_mutations; +" + +${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_limit_mutations" + +wait_for_mutation "t_limit_mutations" "0000000003" + +${CLICKHOUSE_CLIENT} -n --query " +SELECT * FROM t_limit_mutations ORDER BY id; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; +SHOW CREATE TABLE t_limit_mutations; + +DROP TABLE IF EXISTS t_limit_mutations SYNC; +" diff --git a/tests/queries/0_stateless/02724_mutliple_storage_join.reference b/tests/queries/0_stateless/02724_mutliple_storage_join.reference new file mode 100644 index 00000000000..f7eb44d66e0 --- /dev/null +++ b/tests/queries/0_stateless/02724_mutliple_storage_join.reference @@ -0,0 +1,6 @@ +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02724_mutliple_storage_join.sql b/tests/queries/0_stateless/02724_mutliple_storage_join.sql new file mode 100644 index 00000000000..286e867704d --- /dev/null +++ b/tests/queries/0_stateless/02724_mutliple_storage_join.sql @@ -0,0 +1,21 @@ +CREATE TABLE user(id UInt32, name String) ENGINE = Join(ANY, LEFT, id); +INSERT INTO user VALUES (1,'U1')(2,'U2')(3,'U3'); + +CREATE TABLE product(id UInt32, name String, cate String) ENGINE = Join(ANY, LEFT, id); +INSERT INTO product VALUES (1,'P1','C1')(2,'P2','C1')(3,'P3','C2'); + +CREATE TABLE order(id UInt32, pId UInt32, uId UInt32) ENGINE = TinyLog; +INSERT INTO order VALUES (1,1,1)(2,1,2)(3,2,3); + +SELECT ignore(*) FROM ( + SELECT + uId, + user.id as `uuu` + FROM order + LEFT ANY JOIN user + ON uId = `uuu` +); + +SELECT ignore(*) FROM order +LEFT ANY JOIN user ON uId = user.id +LEFT ANY JOIN product ON pId = product.id; diff --git a/tests/queries/0_stateless/02724_persist_interval_type.reference b/tests/queries/0_stateless/02724_persist_interval_type.reference new file mode 100644 index 00000000000..964604605d9 --- /dev/null +++ b/tests/queries/0_stateless/02724_persist_interval_type.reference @@ -0,0 +1,6 @@ +2023-01-01 00:00:01.000000001 2023-01-01 02:00:00.000000001 2023-01-01 00:00:00.000000004 1 2 0 +2023-01-01 00:00:02.000000001 2023-01-01 03:00:00.000000001 2023-01-01 00:00:00.000000005 2 3 0 +2023-01-01 00:00:01.000000001 2023-01-01 02:00:00.000000001 2023-01-01 00:00:00.000000004 1 2 0 +2023-01-01 00:00:02.000000001 2023-01-01 03:00:00.000000001 2023-01-01 00:00:00.000000005 2 3 0 +0 +1 diff --git a/tests/queries/0_stateless/02724_persist_interval_type.sql b/tests/queries/0_stateless/02724_persist_interval_type.sql new file mode 100644 index 00000000000..3acce003c9a --- /dev/null +++ b/tests/queries/0_stateless/02724_persist_interval_type.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS saved_intervals_tmp; +create table saved_intervals_tmp Engine=Memory as SELECT number as EventID, toIntervalSecond(number+1) as v1, toIntervalHour(number+2) as v2, toIntervalNanosecond(number+3) as v3 from numbers(2); +with toDateTime64('2023-01-01 00:00:00.000000001', 9, 'US/Eastern') as c select c+v1 as c_v1, c+v2 as c_v2, c+v3 as c_v3, date_diff(second, c, c_v1), date_diff(hour, c, c_v2), date_diff(second, c, c_v3) from saved_intervals_tmp; +DROP TABLE IF EXISTS saved_intervals_tmp; + +DROP TABLE IF EXISTS saved_intervals_mgt; +create table saved_intervals_mgt Engine=MergeTree() ORDER BY EventID as SELECT number as EventID, toIntervalSecond(number+1) as v1, toIntervalHour(number+2) as v2, toIntervalNanosecond(number+3) as v3 from numbers(2); +with toDateTime64('2023-01-01 00:00:00.000000001', 9, 'US/Eastern') as c select c+v1 as c_v1, c+v2 as c_v2, c+v3 as c_v3, date_diff(second, c, c_v1), date_diff(hour, c, c_v2), date_diff(second, c, c_v3) from saved_intervals_mgt; +DROP TABLE IF EXISTS saved_intervals_mgt; + +DROP TABLE IF EXISTS t1; +CREATE table t1 (v1 IntervalMinute) ENGINE = Memory; +INSERT INTO t1 with toDateTime64('2023-01-01 00:00:00.000000001', 9, 'US/Eastern') as c SELECT EXTRACT(MINUTE FROM c+toIntervalSecond(number * 60)) from numbers(2); +select * from t1; +DROP TABLE IF EXISTS t1; \ No newline at end of file diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference new file mode 100644 index 00000000000..20af3954fa5 --- /dev/null +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -0,0 +1,42 @@ +--- Aliases of SHOW INDEX +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +--- EXTENDED +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +--- WHERE +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +--- Check with weird table names +$4@^7 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c +NULL 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c +\' 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c +\' 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c +--- Original table +tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b +tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d +tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a +tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +--- Equally named table in other database +tbl 0 mmi_idx \N \N \N \N \N \N \N minmax \N \N YES b +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES a +--- Short form +tbl 0 mmi_idx \N \N \N \N \N \N \N minmax \N \N YES b +tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES a diff --git a/tests/queries/0_stateless/02724_show_indexes.sql b/tests/queries/0_stateless/02724_show_indexes.sql new file mode 100644 index 00000000000..ce8ed67c524 --- /dev/null +++ b/tests/queries/0_stateless/02724_show_indexes.sql @@ -0,0 +1,79 @@ +-- Tags: no-parallel +-- no-parallel: creates a custom database schema and expects to use it exclusively + +-- Create a test table and verify that the output of SHOW INDEXES is sane. +-- The matching of actual/expected results relies on the fact that the output of SHOW INDEX is sorted. +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl +( + a UInt64, + b UInt64, + c UInt64, + d UInt64, + e UInt64, + INDEX mm1_idx (a, c, d) TYPE minmax, + INDEX mm2_idx (c, d, e) TYPE minmax, + INDEX set_idx (e) TYPE set(100), + INDEX blf_idx (d, b) TYPE bloom_filter(0.8) +) +ENGINE = MergeTree +PRIMARY KEY (c, a); + +SELECT '--- Aliases of SHOW INDEX'; +SHOW INDEX FROM tbl; +SHOW INDEXES FROM tbl; +SHOW KEYS FROM tbl; + +SELECT '--- EXTENDED'; +SHOW EXTENDED INDEX FROM tbl; +-- +SELECT '--- WHERE'; +SHOW INDEX FROM tbl WHERE index_type LIKE '%minmax%'; + +SELECT '--- Check with weird table names'; + +DROP TABLE IF EXISTS `$4@^7`; +CREATE TABLE `$4@^7` (c String) ENGINE = MergeTree ORDER BY c; +SHOW INDEX FROM `$4@^7`; +DROP TABLE `$4@^7`; + +DROP TABLE IF EXISTS NULL; +CREATE TABLE NULL (c String) ENGINE = MergeTree ORDER BY c; +SHOW INDEX FROM NULL; +DROP TABLE NULL; + +DROP DATABASE IF EXISTS `'`; +CREATE DATABASE `'`; +CREATE TABLE `'`.`'` (c String) ENGINE = MergeTree ORDER BY c; +SHOW INDEX FROM `'` FROM `'`; +SHOW INDEX FROM `'`.`'`; -- abbreviated form +DROP TABLE `'`.`'`; +DROP DATABASE `'`; + +-- Create a table in a different database. Intentionally using the same table/column names as above so +-- we notice if something is buggy in the implementation of SHOW INDEX. +DROP DATABASE IF EXISTS database_123456789abcde; +CREATE DATABASE database_123456789abcde; -- pseudo-random database name + +DROP TABLE IF EXISTS database_123456789abcde.tbl; +CREATE TABLE database_123456789abcde.tbl +( + a UInt64, + b UInt64, + INDEX mmi_idx b TYPE minmax +) +ENGINE = MergeTree +PRIMARY KEY a; + +SELECT '--- Original table'; +SHOW INDEX FROM tbl; + +SELECT '--- Equally named table in other database'; +SHOW INDEX FROM tbl FROM database_123456789abcde; + +SELECT '--- Short form'; +SHOW INDEX FROM database_123456789abcde.tbl; + +DROP DATABASE database_123456789abcde; + +DROP TABLE tbl; diff --git a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference new file mode 100644 index 00000000000..e6b95502e1e --- /dev/null +++ b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference @@ -0,0 +1,2 @@ + ReadFromMergeTree (p1) + Granules: 1/12 diff --git a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql new file mode 100644 index 00000000000..a2355f78f4c --- /dev/null +++ b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql @@ -0,0 +1,32 @@ +-- Tags: no-random-merge-tree-settings + +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 +( + c1 Int64, + c2 Int64, + c3 Int64, + PROJECTION p1 + ( + SELECT + c1, + c2, + sum(c3) + GROUP BY + c2, + c1 + ) +) +ENGINE = MergeTree ORDER BY (c1, c2) settings min_bytes_for_wide_part = 10485760, min_rows_for_wide_part = 0; + +INSERT INTO t0 SELECT + number, + -number, + number +FROM numbers_mt(1e5); + +select * from (EXPLAIN indexes = 1 SELECT c1, sum(c3) FROM t0 GROUP BY c1) where explain like '%ReadFromMergeTree%'; +select * from (EXPLAIN indexes = 1 SELECT c1, sum(c3) FROM t0 WHERE c1 = 100 GROUP BY c1) where explain like '%Granules%'; + +DROP TABLE t0; diff --git a/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.reference b/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.sql b/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.sql new file mode 100644 index 00000000000..083a3aefdaf --- /dev/null +++ b/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.sql @@ -0,0 +1,7 @@ +drop table if exists alias_column_should_not_allow_compression; +create table if not exists alias_column_should_not_allow_compression ( user_id UUID, user_id_hashed ALIAS (cityHash64(user_id))) engine=MergeTree() order by tuple(); +create table if not exists alias_column_should_not_allow_compression_fail ( user_id UUID, user_id_hashed ALIAS (cityHash64(user_id)) codec(LZ4HC(1))) engine=MergeTree() order by tuple(); -- { serverError BAD_ARGUMENTS } +alter table alias_column_should_not_allow_compression modify column user_id codec(LZ4HC(1)); +alter table alias_column_should_not_allow_compression modify column user_id_hashed codec(LZ4HC(1)); -- { serverError BAD_ARGUMENTS } +alter table alias_column_should_not_allow_compression add column user_id_hashed_1 UInt64 ALIAS (cityHash64(user_id)) codec(LZ4HC(1)); -- { serverError BAD_ARGUMENTS } +drop table if exists alias_column_should_not_allow_compression; diff --git a/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference new file mode 100644 index 00000000000..9874d6464ab --- /dev/null +++ b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference @@ -0,0 +1 @@ +1 2 diff --git a/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql new file mode 100644 index 00000000000..6df0e856061 --- /dev/null +++ b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql @@ -0,0 +1 @@ +SELECT 1 `array`, 2 "union"; diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.reference b/tests/queries/0_stateless/02725_async_insert_table_setting.reference new file mode 100644 index 00000000000..5f5235c569f --- /dev/null +++ b/tests/queries/0_stateless/02725_async_insert_table_setting.reference @@ -0,0 +1,4 @@ +2 +2 +default.t_mt_async_insert 1 +default.t_mt_sync_insert 0 diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.sh b/tests/queries/0_stateless/02725_async_insert_table_setting.sh new file mode 100755 index 00000000000..13911e8d677 --- /dev/null +++ b/tests/queries/0_stateless/02725_async_insert_table_setting.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS t_mt_async_insert; +DROP TABLE IF EXISTS t_mt_sync_insert; + +CREATE TABLE t_mt_async_insert (id UInt64, s String) +ENGINE = MergeTree ORDER BY id SETTINGS async_insert = 1; + +CREATE TABLE t_mt_sync_insert (id UInt64, s String) +ENGINE = MergeTree ORDER BY id SETTINGS async_insert = 0;" + +url="${CLICKHOUSE_URL}&async_insert=0&wait_for_async_insert=1" + +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_async_insert VALUES (1, 'aa'), (2, 'bb')" +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_sync_insert VALUES (1, 'aa'), (2, 'bb')" + +${CLICKHOUSE_CLIENT} -n --query " +SELECT count() FROM t_mt_async_insert; +SELECT count() FROM t_mt_sync_insert; + +SYSTEM FLUSH LOGS; +SELECT tables[1], ProfileEvents['AsyncInsertQuery'] FROM system.query_log +WHERE + type = 'QueryFinish' AND + current_database = currentDatabase() AND + query ILIKE 'INSERT INTO t_mt_%sync_insert%' +ORDER BY tables[1]; + +DROP TABLE IF EXISTS t_mt_async_insert; +DROP TABLE IF EXISTS t_mt_sync_insert;" diff --git a/tests/queries/0_stateless/02725_cnf_large_check.reference b/tests/queries/0_stateless/02725_cnf_large_check.reference new file mode 100644 index 00000000000..1c915801174 --- /dev/null +++ b/tests/queries/0_stateless/02725_cnf_large_check.reference @@ -0,0 +1,4 @@ +8 +8 +2 +2 diff --git a/tests/queries/0_stateless/02725_cnf_large_check.sql b/tests/queries/0_stateless/02725_cnf_large_check.sql new file mode 100644 index 00000000000..0780e6bcdd3 --- /dev/null +++ b/tests/queries/0_stateless/02725_cnf_large_check.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS 02725_cnf; + +CREATE TABLE 02725_cnf (c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8) ENGINE = Memory; + +INSERT INTO 02725_cnf VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (0, 0, 0, 0, 0, 0, 0, 0, 1, 1), (0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (0, 0, 0, 0, 0, 0, 0, 1, 0, 1), (0, 0, 0, 0, 0, 0, 0, 1, 1, 0), (0, 0, 0, 0, 0, 0, 0, 1, 1, 1); + +SELECT count() +FROM 02725_cnf +WHERE (c5 AND (NOT c0)) OR ((NOT c3) AND (NOT c6) AND (NOT c1) AND (NOT c6)) OR (c7 AND (NOT c3) AND (NOT c5) AND (NOT c7)) OR ((NOT c8) AND c5) OR ((NOT c0)) OR ((NOT c8) AND (NOT c5) AND c1 AND c6 AND c3) OR (c7 AND (NOT c0) AND c6 AND c1 AND (NOT c2)) OR (c3 AND (NOT c9) AND c1) +SETTINGS convert_query_to_cnf = 1, allow_experimental_analyzer = 1; + +SELECT count() +FROM 02725_cnf +WHERE (c5 AND (NOT c0)) OR ((NOT c3) AND (NOT c6) AND (NOT c1) AND (NOT c6)) OR (c7 AND (NOT c3) AND (NOT c5) AND (NOT c7)) OR ((NOT c8) AND c5) OR ((NOT c0)) OR ((NOT c8) AND (NOT c5) AND c1 AND c6 AND c3) OR (c7 AND (NOT c0) AND c6 AND c1 AND (NOT c2)) OR (c3 AND (NOT c9) AND c1) +SETTINGS convert_query_to_cnf = 1, allow_experimental_analyzer = 0; + +SELECT count() +FROM 02725_cnf +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, allow_experimental_analyzer = 1; + +SELECT count() +FROM 02725_cnf +WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7)) +SETTINGS convert_query_to_cnf = 1, allow_experimental_analyzer = 0; + +DROP TABLE 02725_cnf; diff --git a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql new file mode 100644 index 00000000000..e1db4ba2fa6 --- /dev/null +++ b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS keeper_fault_inject_sequential_cleanup; + +CREATE TABLE keeper_fault_inject_sequential_cleanup (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_02725/tables/keeper_fault_inject_sequential_cleanup', '1') ORDER BY d; + +INSERT INTO keeper_fault_inject_sequential_cleanup VALUES (1); +INSERT INTO keeper_fault_inject_sequential_cleanup SETTINGS insert_deduplicate = 0 VALUES (1); +INSERT INTO keeper_fault_inject_sequential_cleanup SETTINGS insert_deduplicate = 0, insert_keeper_fault_injection_probability = 0.4, insert_keeper_fault_injection_seed = 5619964844601345291 VALUES (1); + +-- with database ordinary it produced a warning +DROP TABLE keeper_fault_inject_sequential_cleanup; diff --git a/tests/queries/0_stateless/02725_local_query_parameters.reference b/tests/queries/0_stateless/02725_local_query_parameters.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02725_local_query_parameters.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02725_local_query_parameters.sh b/tests/queries/0_stateless/02725_local_query_parameters.sh new file mode 100755 index 00000000000..92d7f645454 --- /dev/null +++ b/tests/queries/0_stateless/02725_local_query_parameters.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-replicated-database + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --multiquery -q " + SET param_x=1; + SELECT {x:UInt64}, {x:String};" diff --git a/tests/queries/0_stateless/02725_memory-for-merges.reference b/tests/queries/0_stateless/02725_memory-for-merges.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02725_memory-for-merges.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql new file mode 100644 index 00000000000..b6ae7af7f1a --- /dev/null +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -0,0 +1,27 @@ +-- Tags: no-s3-storage +-- We allocate a lot of memory for buffers when reading or writing to S3 + +DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; + +CREATE TABLE 02725_memory_for_merges +( n UInt64, + s String +) +ENGINE = MergeTree +ORDER BY n +SETTINGS merge_max_block_size_bytes=1024, index_granularity_bytes=1024; + +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); + +OPTIMIZE TABLE 02725_memory_for_merges FINAL; + +SYSTEM FLUSH LOGS; + +WITH (SELECT uuid FROM system.tables WHERE table='02725_memory_for_merges' and database=currentDatabase()) as uuid +SELECT sum(peak_memory_usage) < 1024 * 1024 * 200 from system.part_log where table_uuid=uuid and event_type='MergeParts'; + +DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02725_null_group_key_with_rollup.reference b/tests/queries/0_stateless/02725_null_group_key_with_rollup.reference new file mode 100644 index 00000000000..e296f838e48 --- /dev/null +++ b/tests/queries/0_stateless/02725_null_group_key_with_rollup.reference @@ -0,0 +1,10 @@ +\N 2 + +\N 2 +\N 2 + +\N 2 +\N 2 +\N 2 +\N 2 +\N 2 diff --git a/tests/queries/0_stateless/02725_null_group_key_with_rollup.sql b/tests/queries/0_stateless/02725_null_group_key_with_rollup.sql new file mode 100644 index 00000000000..98f354e2911 --- /dev/null +++ b/tests/queries/0_stateless/02725_null_group_key_with_rollup.sql @@ -0,0 +1,13 @@ +set allow_suspicious_low_cardinality_types=1; +DROP TABLE IF EXISTS group_by_null_key; +CREATE TABLE group_by_null_key (c1 Nullable(Int32), c2 LowCardinality(Nullable(Int32))) ENGINE = Memory(); +INSERT INTO group_by_null_key VALUES (null, null), (null, null); + +select c1, count(*) from group_by_null_key group by c1 WITH TOTALS; +select c2, count(*) from group_by_null_key group by c2 WITH TOTALS; + +select c1, count(*) from group_by_null_key group by ROLLUP(c1); +select c2, count(*) from group_by_null_key group by ROLLUP(c2); + + +DROP TABLE group_by_null_key; diff --git a/tests/queries/0_stateless/02725_object_column_alter.reference b/tests/queries/0_stateless/02725_object_column_alter.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_object_column_alter.sql b/tests/queries/0_stateless/02725_object_column_alter.sql new file mode 100644 index 00000000000..0e41070742a --- /dev/null +++ b/tests/queries/0_stateless/02725_object_column_alter.sql @@ -0,0 +1,15 @@ +-- Eventually this ALTER should be fixed + +SET allow_experimental_object_type=1; + +DROP TABLE IF EXISTS t_to; +DROP TABLE IF EXISTS t_from; + +CREATE TABLE t_to (id UInt64, value Nullable(String)) ENGINE MergeTree() ORDER BY id; +CREATE TABLE t_from (id UInt64, value Object('json')) ENGINE MergeTree() ORDER BY id; + +ALTER TABLE t_to MODIFY COLUMN value Object('json'); -- { serverError BAD_ARGUMENTS } +ALTER TABLE t_from MODIFY COLUMN value Nullable(String); -- { serverError BAD_ARGUMENTS } + +DROP TABLE t_to; +DROP TABLE t_from; diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.reference b/tests/queries/0_stateless/02725_parquet_preserve_order.reference new file mode 100644 index 00000000000..e9c8f99bb33 --- /dev/null +++ b/tests/queries/0_stateless/02725_parquet_preserve_order.reference @@ -0,0 +1,12 @@ +0 +1 +2 +(Expression) +ExpressionTransform + (ReadFromStorage) + File 0 → 1 +(Expression) +ExpressionTransform × 2 + (ReadFromStorage) + Resize 1 → 2 + File 0 → 1 diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.sh b/tests/queries/0_stateless/02725_parquet_preserve_order.sh new file mode 100755 index 00000000000..ea3e4219e35 --- /dev/null +++ b/tests/queries/0_stateless/02725_parquet_preserve_order.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# This file has a row group with 2 rows, then a row group with 1 row. +# It'll be read into two blocks. The first block will sleep 2x longer than the second. +# So reordering is very likely if the order-preservation doesn't work. + +$CLICKHOUSE_LOCAL -q "select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1" + +$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.reference b/tests/queries/0_stateless/02725_start_stop_fetches.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.sh b/tests/queries/0_stateless/02725_start_stop_fetches.sh new file mode 100755 index 00000000000..0ca687ae951 --- /dev/null +++ b/tests/queries/0_stateless/02725_start_stop_fetches.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +NUM_REPLICAS=5 + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS r$i SYNC; + CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x SETTINGS replicated_deduplication_window = 1, allow_remote_fs_zero_copy_replication = 1; + " +done + +function thread { + while true; do + REPLICA=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --query "INSERT INTO r$REPLICA SELECT rand()" + done +} + +function nemesis_thread1 { + while true; do + REPLICA=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --query "SYSTEM STOP REPLICATED SENDS r$REPLICA" + sleep 0.5 + $CLICKHOUSE_CLIENT --query "SYSTEM START REPLICATED SENDS r$REPLICA" + done +} + +function nemesis_thread2 { + while true; do + REPLICA=$(($RANDOM % 5 + 1)) + $CLICKHOUSE_CLIENT --query "SYSTEM STOP FETCHES r$REPLICA" + sleep 0.5 + $CLICKHOUSE_CLIENT --query "SYSTEM START FETCHES r$REPLICA" + done +} + + + +export -f thread +export -f nemesis_thread1 +export -f nemesis_thread2 + +TIMEOUT=20 + +timeout $TIMEOUT bash -c thread 2>/dev/null & +timeout $TIMEOUT bash -c thread 2>/dev/null & +timeout $TIMEOUT bash -c thread 2>/dev/null & +timeout $TIMEOUT bash -c nemesis_thread1 2>/dev/null & +timeout $TIMEOUT bash -c nemesis_thread1 2>/dev/null & +timeout $TIMEOUT bash -c nemesis_thread1 2>/dev/null & +timeout $TIMEOUT bash -c nemesis_thread2 2>/dev/null & +timeout $TIMEOUT bash -c nemesis_thread2 2>/dev/null & +timeout $TIMEOUT bash -c nemesis_thread2 2>/dev/null & + +wait + + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -q "SYSTEM START FETCHES r$REPLICA" + $CLICKHOUSE_CLIENT -q "SYSTEM START REPLICATED SENDS r$REPLICA" +done + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT --max_execution_time 60 -q "SYSTEM SYNC REPLICA r$i PULL" +done + +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT -q "DROP TABLE r$i" 2>/dev/null & +done + +wait diff --git a/tests/queries/0_stateless/02725_url_support_virtual_column.reference b/tests/queries/0_stateless/02725_url_support_virtual_column.reference new file mode 100644 index 00000000000..d20c91344e5 --- /dev/null +++ b/tests/queries/0_stateless/02725_url_support_virtual_column.reference @@ -0,0 +1,4 @@ +/ + + 1 +/ 1 diff --git a/tests/queries/0_stateless/02725_url_support_virtual_column.sql b/tests/queries/0_stateless/02725_url_support_virtual_column.sql new file mode 100644 index 00000000000..02a4dfc449b --- /dev/null +++ b/tests/queries/0_stateless/02725_url_support_virtual_column.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel + +select _path from url('http://127.0.0.1:8123/?query=select+1&user=default', LineAsString, 's String'); +select _file from url('http://127.0.0.1:8123/?query=select+1&user=default', LineAsString, 's String'); +select _file, count() from url('http://127.0.0.1:8123/?query=select+1&user=default', LineAsString, 's String') group by _file; +select _path, _file, s from url('http://127.0.0.1:8123/?query=select+1&user=default', LineAsString, 's String'); +select _path, _file, s from url('http://127.0.0.1:8123/?query=select+1&user=default&password=wrong', LineAsString, 's String'); -- { serverError RECEIVED_ERROR_FROM_REMOTE_IO_SERVER } + diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference b/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql b/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql new file mode 100644 index 00000000000..244ba4e959a --- /dev/null +++ b/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.sql @@ -0,0 +1,2 @@ +SET allow_experimental_query_cache = 0; +SET allow_experimental_query_cache = 1; diff --git a/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.reference b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.reference new file mode 100644 index 00000000000..09d337562b5 --- /dev/null +++ b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.reference @@ -0,0 +1,2 @@ +dict_sharded 1 1000000 0.4768 +dict_sharded_multi 5 1000000 0.4768 diff --git a/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.sql b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.sql new file mode 100644 index 00000000000..1e42f56889d --- /dev/null +++ b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.sql @@ -0,0 +1,17 @@ +DROP DICTIONARY IF EXISTS dict_sharded; +DROP DICTIONARY IF EXISTS dict_sharded_multi; +DROP TABLE IF EXISTS dict_data; + +CREATE TABLE dict_data (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355, number%65535 FROM numbers(1e6); + +CREATE DICTIONARY dict_sharded (key UInt64, v0 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32)); +SYSTEM RELOAD DICTIONARY dict_sharded; +SELECT name, length(attribute.names), element_count, round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded'; +DROP DICTIONARY dict_sharded; + +CREATE DICTIONARY dict_sharded_multi (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32)); +SYSTEM RELOAD DICTIONARY dict_sharded_multi; +SELECT name, length(attribute.names), element_count, round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded_multi'; +DROP DICTIONARY dict_sharded_multi; + +DROP TABLE dict_data; diff --git a/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.reference b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.reference new file mode 100644 index 00000000000..260f08027f1 --- /dev/null +++ b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.reference @@ -0,0 +1,227 @@ +-- { echoOn } +set use_with_fill_by_sorting_prefix=1; +-- corner case with constant sort prefix +SELECT number +FROM numbers(1) +ORDER BY 10 ASC, number DESC WITH FILL FROM 1 +SETTINGS enable_positional_arguments=0; +1 +0 +-- sensor table +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +insert into ts VALUES (1, 10, 1), (1, 12, 2), (3, 5, 1), (3, 7, 3), (5, 1, 1), (5, 3, 1); +-- FillingTransform: 6 rows will be processed in 1 chunks +select * from ts order by sensor_id, timestamp with fill step 1; +1 10 1 +1 11 0 +1 12 2 +3 5 1 +3 6 0 +3 7 3 +5 1 1 +5 2 0 +5 3 1 +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 3 chunks with 2 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1); +insert into ts VALUES (3, 5, 1), (3, 7, 1); +insert into ts VALUES (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=2; +1 10 1 +1 11 0 +1 12 1 +3 5 1 +3 6 0 +3 7 1 +5 1 1 +5 2 0 +5 3 1 +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 2 chunks with 3 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1), (3, 5, 1); +insert into ts VALUES (3, 7, 1), (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=3; +1 10 1 +1 11 0 +1 12 1 +3 5 1 +3 6 0 +3 7 1 +5 1 1 +5 2 0 +5 3 1 +-- FROM and TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 12 1 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +5 1 1 +5 3 1 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +0 6 9999 +0 7 9999 +0 8 9999 +0 9 9999 +1 10 1 +1 12 1 +3 5 1 +3 7 1 +5 1 1 +5 3 1 +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +5 1 1 +5 3 1 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 12 1 +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +5 1 1 +5 3 1 +3 5 1 +0 6 9999 +3 7 1 +0 8 9999 +0 9 9999 +1 10 1 +1 12 1 +-- without TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999); +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 11 9999 +1 12 1 +3 5 1 +3 6 9999 +3 7 1 +5 1 1 +5 3 1 +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +0 6 9999 +0 7 9999 +0 8 9999 +0 9 9999 +1 10 1 +0 11 9999 +1 12 1 +3 5 1 +3 7 1 +5 1 1 +5 3 1 +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999); +5 1 1 +5 3 1 +3 5 1 +3 6 9999 +3 7 1 +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 11 9999 +1 12 1 +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +5 1 1 +5 3 1 +3 5 1 +0 6 9999 +3 7 1 +0 8 9999 +0 9 9999 +1 10 1 +0 11 9999 +1 12 1 +-- without FROM +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999); +1 10 1 +1 12 1 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +5 1 1 +5 2 9999 +5 3 1 +5 4 9999 +5 5 9999 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +1 10 1 +1 12 1 +3 5 1 +3 7 1 +5 1 1 +5 3 1 +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999); +5 1 1 +5 2 9999 +5 3 1 +5 4 9999 +5 5 9999 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +1 10 1 +1 12 1 +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +5 1 1 +0 2 9999 +5 3 1 +0 4 9999 +3 5 1 +0 6 9999 +3 7 1 +0 8 9999 +0 9 9999 +1 10 1 +1 12 1 +-- checking that sorting prefix columns can't be used in INTERPOLATE +SELECT * FROM ts ORDER BY sensor_id, value, timestamp WITH FILL FROM 6 TO 10 INTERPOLATE ( value AS 1 ); -- { serverError INVALID_WITH_FILL_EXPRESSION } diff --git a/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.sql b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.sql new file mode 100644 index 00000000000..e2f1ce29d5c --- /dev/null +++ b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.sql @@ -0,0 +1,60 @@ +-- { echoOn } +set use_with_fill_by_sorting_prefix=1; + +-- corner case with constant sort prefix +SELECT number +FROM numbers(1) +ORDER BY 10 ASC, number DESC WITH FILL FROM 1 +SETTINGS enable_positional_arguments=0; + +-- sensor table +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +insert into ts VALUES (1, 10, 1), (1, 12, 2), (3, 5, 1), (3, 7, 3), (5, 1, 1), (5, 3, 1); +-- FillingTransform: 6 rows will be processed in 1 chunks +select * from ts order by sensor_id, timestamp with fill step 1; + +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 3 chunks with 2 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1); +insert into ts VALUES (3, 5, 1), (3, 7, 1); +insert into ts VALUES (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=2; + +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 2 chunks with 3 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1), (3, 5, 1); +insert into ts VALUES (3, 7, 1), (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=3; + +-- FROM and TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- without TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999); +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999); +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- without FROM +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- checking that sorting prefix columns can't be used in INTERPOLATE +SELECT * FROM ts ORDER BY sensor_id, value, timestamp WITH FILL FROM 6 TO 10 INTERPOLATE ( value AS 1 ); -- { serverError INVALID_WITH_FILL_EXPRESSION } diff --git a/tests/queries/0_stateless/02731_in_operator_with_one_size_tuple.reference b/tests/queries/0_stateless/02731_in_operator_with_one_size_tuple.reference new file mode 100644 index 00000000000..d7ede115bc2 --- /dev/null +++ b/tests/queries/0_stateless/02731_in_operator_with_one_size_tuple.reference @@ -0,0 +1,2 @@ +2023-04-17 1 +2023-04-17 1 diff --git a/tests/queries/0_stateless/02731_in_operator_with_one_size_tuple.sql b/tests/queries/0_stateless/02731_in_operator_with_one_size_tuple.sql new file mode 100644 index 00000000000..eab7d24a92e --- /dev/null +++ b/tests/queries/0_stateless/02731_in_operator_with_one_size_tuple.sql @@ -0,0 +1,10 @@ +CREATE TABLE test(`report_date` Date, `sspid` UInt64) ENGINE MergeTree PARTITION BY report_date ORDER BY report_date; + +INSERT INTO test SELECT toDate('2023-04-20'), 0; +INSERT INTO test SELECT toDate('2023-04-19'), 0; +INSERT INTO test SELECT toDate('2023-04-17'), 1; +INSERT INTO test SELECT toDate('2023-04-17'), 1; + + +SELECT * FROM test WHERE tuple(report_date) IN tuple(toDate('2023-04-17')); +DROP TABLE test; \ No newline at end of file diff --git a/tests/queries/0_stateless/02731_nothing_deserialization.reference b/tests/queries/0_stateless/02731_nothing_deserialization.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02731_nothing_deserialization.sql b/tests/queries/0_stateless/02731_nothing_deserialization.sql new file mode 100644 index 00000000000..7526bce3578 --- /dev/null +++ b/tests/queries/0_stateless/02731_nothing_deserialization.sql @@ -0,0 +1 @@ +SELECT CAST('\x01\x00' AS AggregateFunction(nothingArrayIf, Array(Nullable(Nothing)), Nullable(Nothing))); -- { serverError INCORRECT_DATA } diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference new file mode 100644 index 00000000000..df606679523 --- /dev/null +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference @@ -0,0 +1,44 @@ +=============== INNER QUERY (NO PARALLEL) =============== +0 PJFiUe#J2O _s\' 14427935816175499794 +1 >T%O ,z< 17537932797009027240 +12 D[6,P #}Lmb[ ZzU 6394957109822140795 +18 $_N- 24422838680427462 +2 bX?}ix [ Ny]2 G 16242612901291874718 +20 VE] Y 15120036904703536841 +22 Ti~3)N)< A!( 3 18361093572663329113 +23 Sx>b:^UG XpedE)Q: 7433019734386307503 +29 2j&S)ba?XG QuQj 17163829389637435056 +3 UlI+1 14144472852965836438 +=============== INNER QUERY (PARALLEL) =============== +0 PJFiUe#J2O _s\' 14427935816175499794 +1 >T%O ,z< 17537932797009027240 +12 D[6,P #}Lmb[ ZzU 6394957109822140795 +18 $_N- 24422838680427462 +2 bX?}ix [ Ny]2 G 16242612901291874718 +20 VE] Y 15120036904703536841 +22 Ti~3)N)< A!( 3 18361093572663329113 +23 Sx>b:^UG XpedE)Q: 7433019734386307503 +29 2j&S)ba?XG QuQj 17163829389637435056 +3 UlI+1 14144472852965836438 +=============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE =============== +0 3 SELECT `key`, `value1`, `value2`, toUInt64(min(`time`)) AS `start_ts` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` ORDER BY `key` ASC, `value1` ASC, `value2` ASC LIMIT 10 +1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1; +=============== OUTER QUERY (NO PARALLEL) =============== +>T%O ,z< 10 +NQTpY# W\\Xx4 10 +PJFiUe#J2O _s\' 10 +U c 10 +UlI+1 10 +bX?}ix [ Ny]2 G 10 +tT%O ,z< 10 +NQTpY# W\\Xx4 10 +PJFiUe#J2O _s\' 10 +U c 10 +UlI+1 10 +bX?}ix [ Ny]2 G 10 +t toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` +0 3 SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_11888098645495698704_17868075224240210014` USING (`key`) GROUP BY `key`, `value1`, `value2` +1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1; diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql new file mode 100644 index 00000000000..29c20980c14 --- /dev/null +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -0,0 +1,182 @@ +-- Tags: zookeeper + +CREATE TABLE join_inner_table +( + id UUID, + key String, + number Int64, + value1 String, + value2 String, + time Int64 +) +ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/join_inner_table', 'r1') +ORDER BY (id, number, key); + +INSERT INTO join_inner_table +SELECT + '833c9e22-c245-4eb5-8745-117a9a1f26b1'::UUID as id, + rowNumberInAllBlocks()::String as key, + * FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) +LIMIT 100; + +SET allow_experimental_analyzer = 0; +SET max_parallel_replicas = 3; +SET prefer_localhost_replica = 1; +SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; +SET use_hedged_requests = 0; +SET joined_subquery_requires_alias = 0; + +SELECT '=============== INNER QUERY (NO PARALLEL) ==============='; + +SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts +FROM join_inner_table + PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) +GROUP BY key, value1, value2 +ORDER BY key, value1, value2 +LIMIT 10; + +SELECT '=============== INNER QUERY (PARALLEL) ==============='; + +-- Parallel inner query alone +SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts +FROM join_inner_table +PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) +GROUP BY key, value1, value2 +ORDER BY key, value1, value2 +LIMIT 10 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1; + +SELECT '=============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE ==============='; + +SYSTEM FLUSH LOGS; +-- There should be 4 queries. The main query as received by the initiator and the 3 equal queries sent to each replica +SELECT is_initial_query, count() as c, query, +FROM system.query_log +WHERE + event_date >= yesterday() + AND type = 'QueryFinish' + AND initial_query_id = + ( + SELECT query_id + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND type = 'QueryFinish' + AND query LIKE '-- Parallel inner query alone%' + ) +GROUP BY is_initial_query, query +ORDER BY is_initial_query, c, query; + +---- Query with JOIN + +CREATE TABLE join_outer_table +( + id UUID, + key String, + otherValue1 String, + otherValue2 String, + time Int64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/join_outer_table', 'r1') +ORDER BY (id, time, key); + +INSERT INTO join_outer_table +SELECT + '833c9e22-c245-4eb5-8745-117a9a1f26b1'::UUID as id, + (rowNumberInAllBlocks() % 10)::String as key, + * FROM generateRandom('otherValue1 String, otherValue2 String, time Int64', 1, 10, 2) +LIMIT 100; + + +SELECT '=============== OUTER QUERY (NO PARALLEL) ==============='; + +SELECT + value1, + value2, + avg(count) AS avg +FROM +( + SELECT + key, + value1, + value2, + count() AS count + FROM join_outer_table + INNER JOIN + ( + SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts + FROM join_inner_table + PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) + GROUP BY key, value1, value2 + ) USING (key) + GROUP BY key, value1, value2 +) +GROUP BY value1, value2 +ORDER BY value1, value2; + +SELECT '=============== OUTER QUERY (PARALLEL) ==============='; + +-- Parallel full query +SELECT + value1, + value2, + avg(count) AS avg +FROM + ( + SELECT + key, + value1, + value2, + count() AS count + FROM join_outer_table + INNER JOIN + ( + SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts + FROM join_inner_table + PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) + GROUP BY key, value1, value2 + ) USING (key) + GROUP BY key, value1, value2 + ) +GROUP BY value1, value2 +ORDER BY value1, value2 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1; + +SYSTEM FLUSH LOGS; + +-- There should be 7 queries. The main query as received by the initiator, the 3 equal queries to execute the subquery +-- in the inner join and the 3 queries executing the whole query (but replacing the subquery with a temp table) +SELECT is_initial_query, count() as c, query, +FROM system.query_log +WHERE + event_date >= yesterday() + AND type = 'QueryFinish' + AND initial_query_id = + ( + SELECT query_id + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND type = 'QueryFinish' + AND query LIKE '-- Parallel full query%' + ) +GROUP BY is_initial_query, query +ORDER BY is_initial_query, c, query; diff --git a/tests/queries/0_stateless/02731_parquet_s3.reference b/tests/queries/0_stateless/02731_parquet_s3.reference new file mode 100644 index 00000000000..5a5aaeb0068 --- /dev/null +++ b/tests/queries/0_stateless/02731_parquet_s3.reference @@ -0,0 +1 @@ +12639441726720293784 diff --git a/tests/queries/0_stateless/02731_parquet_s3.sql b/tests/queries/0_stateless/02731_parquet_s3.sql new file mode 100644 index 00000000000..3c3f11f535b --- /dev/null +++ b/tests/queries/0_stateless/02731_parquet_s3.sql @@ -0,0 +1,7 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +-- Reading from s3 a parquet file of size between ~1 MB and ~2 MB was broken at some point. +insert into function s3(s3_conn, filename='test_02731_parquet_s3.parquet') select cityHash64(number) from numbers(170000) settings s3_truncate_on_insert=1; + +select sum(*) from s3(s3_conn, filename='test_02731_parquet_s3.parquet') settings remote_filesystem_read_method='threadpool', remote_filesystem_read_prefetch=1; diff --git a/tests/queries/0_stateless/02731_replace_partition_from_temporary_table.reference b/tests/queries/0_stateless/02731_replace_partition_from_temporary_table.reference new file mode 100644 index 00000000000..ced682dd94f --- /dev/null +++ b/tests/queries/0_stateless/02731_replace_partition_from_temporary_table.reference @@ -0,0 +1,9 @@ +Initial +6 12 +6 12 +REPLACE simple +6 10 +6 10 +ATTACH FROM +6 10 +6 10 diff --git a/tests/queries/0_stateless/02731_replace_partition_from_temporary_table.sql b/tests/queries/0_stateless/02731_replace_partition_from_temporary_table.sql new file mode 100644 index 00000000000..b2f31230dfb --- /dev/null +++ b/tests/queries/0_stateless/02731_replace_partition_from_temporary_table.sql @@ -0,0 +1,48 @@ +-- Tags: no-replicated-database + +DROP TEMPORARY TABLE IF EXISTS src; +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS rdst; + +CREATE TEMPORARY TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +CREATE TABLE dst (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; +CREATE TABLE rdst (p UInt64, k String, d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_alter_attach_00626_rdst', 'r1') PARTITION BY p ORDER BY k; + +SELECT 'Initial'; +INSERT INTO src VALUES (0, '0', 1); +INSERT INTO src VALUES (1, '0', 1); +INSERT INTO src VALUES (1, '1', 1); +INSERT INTO src VALUES (2, '0', 1); +INSERT INTO src VALUES (3, '0', 1); +INSERT INTO src VALUES (3, '1', 1); + +INSERT INTO dst VALUES (0, '1', 2); +INSERT INTO dst VALUES (1, '1', 2), (1, '2', 2); +INSERT INTO dst VALUES (2, '1', 2); +INSERT INTO dst VALUES (3, '1', 2), (3, '2', 2); + +INSERT INTO rdst VALUES (0, '1', 2); +INSERT INTO rdst VALUES (1, '1', 2), (1, '2', 2); +INSERT INTO rdst VALUES (2, '1', 2); +INSERT INTO rdst VALUES (3, '1', 2), (3, '2', 2); + +SELECT count(), sum(d) FROM dst; +SELECT count(), sum(d) FROM rdst; + +SELECT 'REPLACE simple'; +ALTER TABLE dst REPLACE PARTITION 1 FROM src; +SELECT count(), sum(d) FROM dst; +ALTER TABLE rdst REPLACE PARTITION 3 FROM src; +SELECT count(), sum(d) FROM rdst; + +SELECT 'ATTACH FROM'; +ALTER TABLE dst DROP PARTITION 1; +ALTER TABLE dst ATTACH PARTITION 1 FROM src; +SELECT count(), sum(d) FROM dst; +ALTER TABLE rdst DROP PARTITION 3; +ALTER TABLE rdst ATTACH PARTITION 1 FROM src; +SELECT count(), sum(d) FROM rdst; + +DROP TEMPORARY TABLE IF EXISTS src; +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS rdst; diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference b/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference new file mode 100644 index 00000000000..125915f4f65 --- /dev/null +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference @@ -0,0 +1,3 @@ +1 [] +1 [] +[] diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh new file mode 100755 index 00000000000..690cf977d08 --- /dev/null +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -n --query " +DROP TABLE IF EXISTS test; +CREATE TABLE test (id Int32, empty Array(Int32)) + ENGINE=MergeTree ORDER BY id + SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0, disk='s3_disk'; + +INSERT INTO test (id) VALUES (1); +SELECT * FROM test; +" + +${CLICKHOUSE_CLIENT} -n --query " +BACKUP TABLE test TO Disk('backups', 'test_s3_backup'); +DROP TABLE test; +RESTORE TABLE test FROM Disk('backups', 'test_s3_backup'); +" &>/dev/null + +${CLICKHOUSE_CLIENT} -n --query " +SELECT * FROM test; +SELECT empty FROM test; +" diff --git a/tests/queries/0_stateless/02732_transform_fuzz.reference b/tests/queries/0_stateless/02732_transform_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02732_transform_fuzz.sql b/tests/queries/0_stateless/02732_transform_fuzz.sql new file mode 100644 index 00000000000..c2918d4da81 --- /dev/null +++ b/tests/queries/0_stateless/02732_transform_fuzz.sql @@ -0,0 +1 @@ +SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02733_distinct.reference b/tests/queries/0_stateless/02733_distinct.reference new file mode 100644 index 00000000000..caaa76087c9 --- /dev/null +++ b/tests/queries/0_stateless/02733_distinct.reference @@ -0,0 +1,10 @@ + 1 + 2 +v1 v2 3 +v1 v2 4 +v1 v2 5 + 1 + 2 +v1 v2 3 +v1 v2 4 +v1 v2 5 diff --git a/tests/queries/0_stateless/02733_distinct.sql b/tests/queries/0_stateless/02733_distinct.sql new file mode 100644 index 00000000000..bbb26b17d8c --- /dev/null +++ b/tests/queries/0_stateless/02733_distinct.sql @@ -0,0 +1,19 @@ +-- Tags: no-random-settings +-- there is a bug if `optimize_distinct_in_order` is true + +DROP TABLE IF EXISTS test; +CREATE TABLE test +( + c1 String, + c2 String, + c3 String +) +ENGINE = ReplacingMergeTree +ORDER BY (c1, c3); + +INSERT INTO test(c1, c2, c3) VALUES ('', '', '1'), ('', '', '2'),('v1', 'v2', '3'),('v1', 'v2', '4'),('v1', 'v2', '5'); + +SELECT c1, c2, c3 FROM test GROUP BY c1, c2, c3 ORDER BY c1, c2, c3; +SELECT DISTINCT c1, c2, c3 FROM test; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02733_fix_distinct_in_order_bug_49622.reference b/tests/queries/0_stateless/02733_fix_distinct_in_order_bug_49622.reference new file mode 100644 index 00000000000..19f58f6ca91 --- /dev/null +++ b/tests/queries/0_stateless/02733_fix_distinct_in_order_bug_49622.reference @@ -0,0 +1,2 @@ + 1 + 2 diff --git a/tests/queries/0_stateless/02733_fix_distinct_in_order_bug_49622.sql b/tests/queries/0_stateless/02733_fix_distinct_in_order_bug_49622.sql new file mode 100644 index 00000000000..9501a2c0761 --- /dev/null +++ b/tests/queries/0_stateless/02733_fix_distinct_in_order_bug_49622.sql @@ -0,0 +1,15 @@ +set optimize_distinct_in_order=1; + +DROP TABLE IF EXISTS test_string; + +CREATE TABLE test_string +( + `c1` String, + `c2` String +) +ENGINE = MergeTree +ORDER BY c1; + +INSERT INTO test_string(c1, c2) VALUES ('1', ''), ('2', ''); + +SELECT DISTINCT c2, c1 FROM test_string; diff --git a/tests/queries/0_stateless/02733_sparse_columns_reload.reference b/tests/queries/0_stateless/02733_sparse_columns_reload.reference new file mode 100644 index 00000000000..7ab314964ee --- /dev/null +++ b/tests/queries/0_stateless/02733_sparse_columns_reload.reference @@ -0,0 +1,2 @@ +100000 +100000 diff --git a/tests/queries/0_stateless/02733_sparse_columns_reload.sql b/tests/queries/0_stateless/02733_sparse_columns_reload.sql new file mode 100644 index 00000000000..d4b48274107 --- /dev/null +++ b/tests/queries/0_stateless/02733_sparse_columns_reload.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t_sparse_reload; + +CREATE TABLE t_sparse_reload (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.95; + +INSERT INTO t_sparse_reload SELECT number, 0 FROM numbers(100000); + +SELECT count() FROM t_sparse_reload WHERE NOT ignore(*); + +ALTER TABLE t_sparse_reload MODIFY SETTING ratio_of_defaults_for_sparse_serialization = 1.0; + +DETACH TABLE t_sparse_reload; +ATTACH TABLE t_sparse_reload; + +SELECT count() FROM t_sparse_reload WHERE NOT ignore(*); + +DROP TABLE t_sparse_reload; diff --git a/tests/queries/0_stateless/02734_big_int_from_float_ubsan.reference b/tests/queries/0_stateless/02734_big_int_from_float_ubsan.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02734_big_int_from_float_ubsan.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02734_big_int_from_float_ubsan.sql b/tests/queries/0_stateless/02734_big_int_from_float_ubsan.sql new file mode 100644 index 00000000000..9fbf54c1a4d --- /dev/null +++ b/tests/queries/0_stateless/02734_big_int_from_float_ubsan.sql @@ -0,0 +1,9 @@ +WITH + 18 AS precision, + toUInt256(-1) AS int, + toUInt256(toFloat64(int)) AS converted, + toString(int) AS int_str, + toString(converted) AS converted_str +SELECT + length(int_str) = length(converted_str) AS have_same_length, + substring(int_str, 1, precision) = substring(converted_str, 1, precision) AS have_same_prefix diff --git a/tests/queries/0_stateless/02734_optimize_group_by.reference b/tests/queries/0_stateless/02734_optimize_group_by.reference new file mode 100644 index 00000000000..3f5ef03cb61 --- /dev/null +++ b/tests/queries/0_stateless/02734_optimize_group_by.reference @@ -0,0 +1,8 @@ +a b +a b +a b + b +a b +a b +a b +a b diff --git a/tests/queries/0_stateless/02734_optimize_group_by.sql b/tests/queries/0_stateless/02734_optimize_group_by.sql new file mode 100644 index 00000000000..28e86c04b0f --- /dev/null +++ b/tests/queries/0_stateless/02734_optimize_group_by.sql @@ -0,0 +1,7 @@ +SELECT 'a' AS key, 'b' as value GROUP BY key WITH CUBE SETTINGS allow_experimental_analyzer = 0; +SELECT 'a' AS key, 'b' as value GROUP BY key WITH CUBE SETTINGS allow_experimental_analyzer = 1; + +SELECT 'a' AS key, 'b' as value GROUP BY ignore(1) WITH CUBE; + +SELECT 'a' AS key, 'b' as value GROUP BY ignore(1); +SELECT 'a' AS key, 'b' as value GROUP BY key; diff --git a/tests/queries/0_stateless/02734_sparse_columns_mutation.reference b/tests/queries/0_stateless/02734_sparse_columns_mutation.reference new file mode 100644 index 00000000000..bdce3124792 --- /dev/null +++ b/tests/queries/0_stateless/02734_sparse_columns_mutation.reference @@ -0,0 +1,5 @@ +5000 1189524 +1 +3333 0 +0 +3333 0 diff --git a/tests/queries/0_stateless/02734_sparse_columns_mutation.sql b/tests/queries/0_stateless/02734_sparse_columns_mutation.sql new file mode 100644 index 00000000000..6fdb5b5f4a7 --- /dev/null +++ b/tests/queries/0_stateless/02734_sparse_columns_mutation.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS t_sparse_mutation; + +CREATE TABLE t_sparse_mutation (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_mutation select number, if (number % 21 = 0, number, 0) FROM numbers(10000); + +SET mutations_sync = 2; + +DELETE FROM t_sparse_mutation WHERE id % 2 = 0; + +SELECT count(), sum(v) FROM t_sparse_mutation; + +SELECT sum(has_lightweight_delete) FROM system.parts +WHERE database = currentDatabase() AND table = 't_sparse_mutation' AND active; + +ALTER TABLE t_sparse_mutation UPDATE v = v * 2 WHERE id % 5 = 0; +ALTER TABLE t_sparse_mutation DELETE WHERE id % 3 = 0; + +SELECT count(), sum(v) FROM t_sparse_mutation; + +OPTIMIZE TABLE t_sparse_mutation FINAL; + +SELECT sum(has_lightweight_delete) FROM system.parts +WHERE database = currentDatabase() AND table = 't_sparse_mutation' AND active; + +SELECT count(), sum(v) FROM t_sparse_mutation; + +DROP TABLE t_sparse_mutation; diff --git a/tests/queries/0_stateless/02734_sparse_columns_short_circuit.reference b/tests/queries/0_stateless/02734_sparse_columns_short_circuit.reference new file mode 100644 index 00000000000..bf2c10d23ec --- /dev/null +++ b/tests/queries/0_stateless/02734_sparse_columns_short_circuit.reference @@ -0,0 +1 @@ +477 diff --git a/tests/queries/0_stateless/02734_sparse_columns_short_circuit.sql b/tests/queries/0_stateless/02734_sparse_columns_short_circuit.sql new file mode 100644 index 00000000000..da8de22a80e --- /dev/null +++ b/tests/queries/0_stateless/02734_sparse_columns_short_circuit.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_sparse_short_circuit; + +SET short_circuit_function_evaluation = 'force_enable'; + +CREATE TABLE t_sparse_short_circuit (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +INSERT INTO t_sparse_short_circuit select number, if (number % 21 = 0, number % 10 + 1, 0) FROM numbers(100000); + +SELECT sum(if(a % 10 = 0, CAST(b, 'UInt8'), 0)) FROM t_sparse_short_circuit; + +DROP TABLE t_sparse_short_circuit; diff --git a/tests/queries/0_stateless/02735_array_map_array_of_tuples.reference b/tests/queries/0_stateless/02735_array_map_array_of_tuples.reference new file mode 100644 index 00000000000..80481fee564 --- /dev/null +++ b/tests/queries/0_stateless/02735_array_map_array_of_tuples.reference @@ -0,0 +1,4 @@ +[(1)] +[1] +[3] +[3] diff --git a/tests/queries/0_stateless/02735_array_map_array_of_tuples.sql b/tests/queries/0_stateless/02735_array_map_array_of_tuples.sql new file mode 100644 index 00000000000..51d60aa0cd0 --- /dev/null +++ b/tests/queries/0_stateless/02735_array_map_array_of_tuples.sql @@ -0,0 +1,4 @@ +SELECT arrayMap((x) -> x, [tuple(1)]); +SELECT arrayMap((x) -> x.1, [tuple(1)]); +SELECT arrayMap((x) -> x.1 + x.2, [tuple(1, 2)]); +SELECT arrayMap((x, y) -> x + y, [tuple(1, 2)]); diff --git a/tests/queries/0_stateless/02735_asof_join_right_null.reference b/tests/queries/0_stateless/02735_asof_join_right_null.reference new file mode 100644 index 00000000000..d4332556cb5 --- /dev/null +++ b/tests/queries/0_stateless/02735_asof_join_right_null.reference @@ -0,0 +1,95 @@ +-- { echoOn } +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +1 2 0 \N +1 3 0 \N +1 4 0 \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +1 3 0 \N +1 4 0 \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 -1 0 \N +1 0 0 \N +1 1 0 \N +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 -1 0 \N +1 0 0 \N +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +SET join_use_nulls = 1; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +1 2 \N \N +1 3 \N \N +1 4 \N \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +1 3 \N \N +1 4 \N \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 -1 \N \N +1 0 \N \N +1 1 \N \N +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 -1 \N \N +1 0 \N \N +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +DROP TABLE t1; diff --git a/tests/queries/0_stateless/02735_asof_join_right_null.sql b/tests/queries/0_stateless/02735_asof_join_right_null.sql new file mode 100644 index 00000000000..997d33a0570 --- /dev/null +++ b/tests/queries/0_stateless/02735_asof_join_right_null.sql @@ -0,0 +1,32 @@ + +CREATE TABLE t1 (a Int, b Int) ENGINE = Memory; +INSERT INTO t1 VALUES (1, -1), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4); + +CREATE TABLE t2 (a Int, b Nullable(Int)) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 1), (1, NULL), (1, 2); + +-- { echoOn } +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +SET join_use_nulls = 1; + +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +DROP TABLE t1; + diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference new file mode 100644 index 00000000000..1deabd88b88 --- /dev/null +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference @@ -0,0 +1,2 @@ +default ::1 9181 0 0 3 +zookeeper2 ::1 9181 0 0 0 diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql new file mode 100644 index 00000000000..8b37c428413 --- /dev/null +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql @@ -0,0 +1,15 @@ +-- Tags: no-fasttest, no-replicated-database + +DROP TABLE IF EXISTS test_zk_connection_table; + +CREATE TABLE test_zk_connection_table ( + key UInt64 +) +ENGINE ReplicatedMergeTree('zookeeper2:/clickhouse/{database}/02731_zk_connection/{shard}', '{replica}') +ORDER BY tuple(); + +-- keeper_api_version will by 0 for auxiliary_zookeeper2, because we fail to get /api_version due to chroot +-- I'm not sure if it's a bug or a useful trick to fallback to basic api +select name, host, port, index, is_expired, keeper_api_version from system.zookeeper_connection order by name; + +DROP TABLE IF EXISTS test_zk_connection_table; diff --git a/tests/queries/0_stateless/02736_bit_count_big_int.reference b/tests/queries/0_stateless/02736_bit_count_big_int.reference new file mode 100644 index 00000000000..a3a725ace69 --- /dev/null +++ b/tests/queries/0_stateless/02736_bit_count_big_int.reference @@ -0,0 +1,13 @@ +128 +256 +128 +256 +127 +255 +126 +255 +64 +UInt8 +UInt16 +UInt8 +UInt16 diff --git a/tests/queries/0_stateless/02736_bit_count_big_int.sql b/tests/queries/0_stateless/02736_bit_count_big_int.sql new file mode 100644 index 00000000000..35a4a641606 --- /dev/null +++ b/tests/queries/0_stateless/02736_bit_count_big_int.sql @@ -0,0 +1,19 @@ +SELECT bitCount(CAST(-1 AS UInt128)); +SELECT bitCount(CAST(-1 AS UInt256)); + +SELECT bitCount(CAST(-1 AS Int128)); +SELECT bitCount(CAST(-1 AS Int256)); + +SELECT bitCount(CAST(-1 AS UInt128) - 1); +SELECT bitCount(CAST(-1 AS UInt256) - 2); + +SELECT bitCount(CAST(-1 AS Int128) - 3); +SELECT bitCount(CAST(-1 AS Int256) - 4); + +SELECT bitCount(CAST(0xFFFFFFFFFFFFFFFF AS Int256)); + +SELECT toTypeName(bitCount(1::UInt128)); +SELECT toTypeName(bitCount(1::UInt256)); + +SELECT toTypeName(bitCount(1::Int128)); +SELECT toTypeName(bitCount(1::Int256)); diff --git a/tests/queries/0_stateless/02737_sql_auto_is_null.reference b/tests/queries/0_stateless/02737_sql_auto_is_null.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02737_sql_auto_is_null.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02737_sql_auto_is_null.sql b/tests/queries/0_stateless/02737_sql_auto_is_null.sql new file mode 100644 index 00000000000..22f1a9524ca --- /dev/null +++ b/tests/queries/0_stateless/02737_sql_auto_is_null.sql @@ -0,0 +1,2 @@ +SET SQL_AUTO_IS_NULL = 0; +SELECT getSetting('SQL_AUTO_IS_NULL'); diff --git a/tests/queries/0_stateless/02740_hashed_dictionary_load_factor_smoke.reference b/tests/queries/0_stateless/02740_hashed_dictionary_load_factor_smoke.reference new file mode 100644 index 00000000000..d43fa8e734c --- /dev/null +++ b/tests/queries/0_stateless/02740_hashed_dictionary_load_factor_smoke.reference @@ -0,0 +1,12 @@ +CREATE DICTIONARY default.test_sparse_dictionary_load_factor\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(MAX_LOAD_FACTOR 0.9)) +100000 +0 +CREATE DICTIONARY default.test_dictionary_load_factor\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(HASHED(MAX_LOAD_FACTOR 0.9)) +100000 +0 +CREATE DICTIONARY default.test_dictionary_load_factor_nullable\n(\n `key` UInt64,\n `value` Nullable(UInt16)\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table_nullable))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(HASHED(MAX_LOAD_FACTOR 0.9)) +100000 +0 +CREATE DICTIONARY default.test_complex_dictionary_load_factor\n(\n `key_1` UInt64,\n `key_2` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key_1, key_2\nSOURCE(CLICKHOUSE(TABLE test_table_complex))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(COMPLEX_KEY_HASHED(MAX_LOAD_FACTOR 0.9)) +100000 +0 diff --git a/tests/queries/0_stateless/02740_hashed_dictionary_load_factor_smoke.sql b/tests/queries/0_stateless/02740_hashed_dictionary_load_factor_smoke.sql new file mode 100644 index 00000000000..d4bb9a1b14a --- /dev/null +++ b/tests/queries/0_stateless/02740_hashed_dictionary_load_factor_smoke.sql @@ -0,0 +1,107 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + key UInt64, + value UInt16 +) ENGINE=Memory() AS SELECT number, number FROM numbers(1e5); + +DROP TABLE IF EXISTS test_table_nullable; +CREATE TABLE test_table_nullable +( + key UInt64, + value Nullable(UInt16) +) ENGINE=Memory() AS SELECT number, number % 2 == 0 ? NULL : number FROM numbers(1e5); + +DROP TABLE IF EXISTS test_table_string; +CREATE TABLE test_table_string +( + key String, + value UInt16 +) ENGINE=Memory() AS SELECT 'foo' || number::String, number FROM numbers(1e5); + +DROP TABLE IF EXISTS test_table_complex; +CREATE TABLE test_table_complex +( + key_1 UInt64, + key_2 UInt64, + value UInt16 +) ENGINE=Memory() AS SELECT number, number, number FROM numbers(1e5); + +DROP DICTIONARY IF EXISTS test_sparse_dictionary_load_factor; +CREATE DICTIONARY test_sparse_dictionary_load_factor +( + key UInt64, + value UInt16 +) PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE test_table)) +LAYOUT(SPARSE_HASHED(MAX_LOAD_FACTOR 0.90)) +LIFETIME(0); +SHOW CREATE test_sparse_dictionary_load_factor; +SYSTEM RELOAD DICTIONARY test_sparse_dictionary_load_factor; +SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_sparse_dictionary_load_factor'; +SELECT count() FROM test_table WHERE dictGet('test_sparse_dictionary_load_factor', 'value', key) != value; +DROP DICTIONARY test_sparse_dictionary_load_factor; + +DROP DICTIONARY IF EXISTS test_dictionary_load_factor; +CREATE DICTIONARY test_dictionary_load_factor +( + key UInt64, + value UInt16 +) PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE test_table)) +LAYOUT(HASHED(MAX_LOAD_FACTOR 0.90)) +LIFETIME(0); +SHOW CREATE test_dictionary_load_factor; +SYSTEM RELOAD DICTIONARY test_dictionary_load_factor; +SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_dictionary_load_factor'; +SELECT count() FROM test_table WHERE dictGet('test_dictionary_load_factor', 'value', key) != value; +DROP DICTIONARY test_dictionary_load_factor; + +DROP DICTIONARY IF EXISTS test_dictionary_load_factor_nullable; +CREATE DICTIONARY test_dictionary_load_factor_nullable +( + key UInt64, + value Nullable(UInt16) +) PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE test_table_nullable)) +LAYOUT(HASHED(MAX_LOAD_FACTOR 0.90)) +LIFETIME(0); +SHOW CREATE test_dictionary_load_factor_nullable; +SYSTEM RELOAD DICTIONARY test_dictionary_load_factor_nullable; +SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_dictionary_load_factor_nullable'; +SELECT count() FROM test_table_nullable WHERE dictGet('test_dictionary_load_factor_nullable', 'value', key) != value; +DROP DICTIONARY test_dictionary_load_factor_nullable; + +DROP DICTIONARY IF EXISTS test_complex_dictionary_load_factor; +CREATE DICTIONARY test_complex_dictionary_load_factor +( + key_1 UInt64, + key_2 UInt64, + value UInt16 +) PRIMARY KEY key_1, key_2 +SOURCE(CLICKHOUSE(TABLE test_table_complex)) +LAYOUT(COMPLEX_KEY_HASHED(MAX_LOAD_FACTOR 0.90)) +LIFETIME(0); +SYSTEM RELOAD DICTIONARY test_complex_dictionary_load_factor; +SHOW CREATE test_complex_dictionary_load_factor; +SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() and name = 'test_complex_dictionary_load_factor'; +SELECT count() FROM test_table_complex WHERE dictGet('test_complex_dictionary_load_factor', 'value', (key_1, key_2)) != value; +DROP DICTIONARY test_complex_dictionary_load_factor; + +DROP DICTIONARY IF EXISTS test_dictionary_load_factor_string; +CREATE DICTIONARY test_dictionary_load_factor_string +( + key String, + value UInt16 +) PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE test_table_string)) +LAYOUT(HASHED(MAX_LOAD_FACTOR 1)) +LIFETIME(0); +-- should because of MAX_LOAD_FACTOR is 1 (maximum allowed value is 0.99) +SYSTEM RELOAD DICTIONARY test_dictionary_load_factor_string; -- { serverError BAD_ARGUMENTS } +DROP DICTIONARY test_dictionary_load_factor_string; + +DROP TABLE test_table; +DROP TABLE test_table_nullable; +DROP TABLE test_table_string; +DROP TABLE test_table_complex; diff --git a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference new file mode 100644 index 00000000000..abe891cbb9b --- /dev/null +++ b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.reference @@ -0,0 +1,4 @@ +test_dictionary_hashed 1000000 0.4768 33558760 +test_dictionary_hashed_load_factor 1000000 0.9537 16781544 +test_dictionary_sparse_hashed 1000000 0.4768 20975848 +test_dictionary_sparse_hashed_load_factor 1000000 0.9537 10490088 diff --git a/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2 b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2 new file mode 100644 index 00000000000..870acd54514 --- /dev/null +++ b/tests/queries/0_stateless/02741_hashed_dictionary_load_factor.sql.j2 @@ -0,0 +1,39 @@ +{# vi: ft=sql #} + +{% for layout in ["hashed", "sparse_hashed"] %} +DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}; +DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor; +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + key UInt64, + value UInt16 +) ENGINE=Memory() AS SELECT number, number FROM numbers(1e6); + +CREATE DICTIONARY test_dictionary_{{layout}} +( + key UInt64, + value UInt16 +) PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE test_table)) +LAYOUT({{layout}}()) +LIFETIME(0); + +CREATE DICTIONARY test_dictionary_{{layout}}_load_factor +( + key UInt64, + value UInt16 +) PRIMARY KEY key +SOURCE(CLICKHOUSE(TABLE test_table)) +LAYOUT({{layout}}(MAX_LOAD_FACTOR 0.98)) +LIFETIME(0); + +SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}}; +SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}}_load_factor; +SELECT name, element_count, round(load_factor, 4), bytes_allocated FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name; + +DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}; +DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor; +DROP TABLE test_table; +{% endfor %} diff --git a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python new file mode 100644 index 00000000000..768fb2144e3 --- /dev/null +++ b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +import socket +import os +import uuid +import json + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + + +def writeVarUInt(x, ba): + for _ in range(0, 9): + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, "utf-8") + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size=1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + + +def readUInt8(s): + return readUInt(s) + + +def readUInt16(s): + return readUInt(s, 2) + + +def readUInt32(s): + return readUInt(s, 4) + + +def readUInt64(s): + return readUInt(s, 8) + + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode("utf-8") + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert p_type == 0 # Hello + _server_name = readStringBinary(s) + _server_version_major = readVarUInt(s) + _server_version_minor = readVarUInt(s) + _server_revision = readVarUInt(s) + _server_timezone = readStringBinary(s) + _server_display_name = readStringBinary(s) + _server_version_patch = readVarUInt(s) + + +def serializeClientInfo(ba, query_id): + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query, settings): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + # Settings + for key, value in settings.items(): + writeStringBinary(key, ba) + writeVarUInt(1, ba) # is_important + writeStringBinary(str(value), ba) + writeStringBinary("", ba) # End of settings + + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def assertPacket(packet, expected): + assert packet == expected, "Got: {}, expected: {}".format(packet, expected) + + +def readResponse(s): + packet_type = readVarUInt(s) + if packet_type == 2: # Exception + raise RuntimeError(readException(s)) + + if packet_type == 1: # Data + return None + if packet_type == 3: # Progress + return None + if packet_type == 5: # End stream + return None + + raise RuntimeError("Unexpected packet: {}".format(packet_type)) + + +def readException(s): + code = readUInt32(s) + _name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) + + +def main(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "select 1", {"replication_alter_partitions_sync": 1}) + # external tables + sendEmptyBlock(s) + + while readResponse(s) is not None: + pass + + s.close() + print("OK") + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.reference b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.sh b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.sh new file mode 100755 index 00000000000..35d685c1580 --- /dev/null +++ b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +python3 "$CURDIR"/02750_settings_alias_tcp_protocol.python diff --git a/tests/queries/0_stateless/02751_match_constant_needle.reference b/tests/queries/0_stateless/02751_match_constant_needle.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02751_match_constant_needle.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02751_match_constant_needle.sql b/tests/queries/0_stateless/02751_match_constant_needle.sql new file mode 100644 index 00000000000..71bdcc7cb0a --- /dev/null +++ b/tests/queries/0_stateless/02751_match_constant_needle.sql @@ -0,0 +1 @@ +select match('default/k8s1', '\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z'); diff --git a/tests/queries/0_stateless/02751_multiif_to_if_crash.reference b/tests/queries/0_stateless/02751_multiif_to_if_crash.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02751_multiif_to_if_crash.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02751_multiif_to_if_crash.sql b/tests/queries/0_stateless/02751_multiif_to_if_crash.sql new file mode 100644 index 00000000000..05233c2c4b9 --- /dev/null +++ b/tests/queries/0_stateless/02751_multiif_to_if_crash.sql @@ -0,0 +1 @@ +SELECT sum(A) FROM (SELECT multiIf(1, 1, NULL) as A); diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.reference b/tests/queries/0_stateless/02751_multiquery_with_argument.reference new file mode 100644 index 00000000000..33288ec5bcb --- /dev/null +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.reference @@ -0,0 +1,24 @@ +100 +101 +102 +103 +Syntax error +Empty query +Empty query +BAD_ARGUMENTS +BAD_ARGUMENTS +BAD_ARGUMENTS +Bad arguments +Bad arguments +Bad arguments +Bad arguments +Bad arguments +Bad arguments +Bad arguments +BAD_ARGUMENTS +Bad arguments +BAD_ARGUMENTS +Bad arguments +Bad arguments +Bad arguments +Bad arguments diff --git a/tests/queries/0_stateless/02751_multiquery_with_argument.sh b/tests/queries/0_stateless/02751_multiquery_with_argument.sh new file mode 100755 index 00000000000..153004c752e --- /dev/null +++ b/tests/queries/0_stateless/02751_multiquery_with_argument.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --multiquery "SELECT 100" +$CLICKHOUSE_LOCAL --multiquery "SELECT 101;" +$CLICKHOUSE_LOCAL --multiquery "SELECT 102;SELECT 103;" + +# Invalid SQL. +$CLICKHOUSE_LOCAL --multiquery "SELECT 200; S" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_LOCAL --multiquery "; SELECT 201;" 2>&1 | grep -o 'Empty query' +$CLICKHOUSE_LOCAL --multiquery "; S; SELECT 202" 2>&1 | grep -o 'Empty query' + +# Error expectation cases. +# -n is prohibited +$CLICKHOUSE_LOCAL -n "SELECT 301" 2>&1 | grep -o 'BAD_ARGUMENTS' +$CLICKHOUSE_LOCAL -n "SELECT 302;" 2>&1 | grep -o 'BAD_ARGUMENTS' +$CLICKHOUSE_LOCAL -n "SELECT 304;SELECT 305;" 2>&1 | grep -o 'BAD_ARGUMENTS' +$CLICKHOUSE_LOCAL --multiquery --multiquery 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL -n --multiquery 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --multiquery -n 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --multiquery --multiquery "SELECT 306; SELECT 307;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL -n --multiquery "SELECT 307; SELECT 308;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --multiquery "SELECT 309; SELECT 310;" --multiquery 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --multiquery "SELECT 311;" --multiquery "SELECT 312;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --multiquery "SELECT 313;" -n "SELECT 314;" 2>&1 | grep -o 'BAD_ARGUMENTS' +$CLICKHOUSE_LOCAL --multiquery "SELECT 315;" --query "SELECT 316;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL -n "SELECT 320" --query "SELECT 317;" 2>&1 | grep -o 'BAD_ARGUMENTS' +$CLICKHOUSE_LOCAL --query --multiquery --multiquery "SELECT 318;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --query --multiquery "SELECT 319;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --query -n "SELECT 400;" 2>&1 | grep -o 'Bad arguments' +$CLICKHOUSE_LOCAL --query -n --multiquery "SELECT 401;" 2>&1 | grep -o 'Bad arguments' \ No newline at end of file diff --git a/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.reference b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql new file mode 100644 index 00000000000..2ea2cecc7b5 --- /dev/null +++ b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql @@ -0,0 +1,43 @@ +CREATE TABLE join_inner_table__fuzz_1 +( + `id` UUID, + `key` Nullable(Date), + `number` Int64, + `value1` LowCardinality(String), + `value2` LowCardinality(String), + `time` Int128 +) +ENGINE = MergeTree +ORDER BY (id, number, key) +SETTINGS allow_nullable_key = 1; + +INSERT INTO join_inner_table__fuzz_1 SELECT + CAST('833c9e22-c245-4eb5-8745-117a9a1f26b1', 'UUID') AS id, + CAST(rowNumberInAllBlocks(), 'String') AS key, + * +FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) +LIMIT 100; + +SET max_parallel_replicas = 3, prefer_localhost_replica = 1, use_hedged_requests = 0, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 1; + +-- SELECT query will write a Warning to the logs +SET send_logs_level='error'; + +SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts +FROM join_inner_table__fuzz_1 +PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) +GROUP BY + key, + value1, + value2 + WITH ROLLUP +ORDER BY + key ASC, + value1 ASC, + value2 ASC NULLS LAST +LIMIT 10 +FORMAT Null; diff --git a/tests/queries/0_stateless/02751_protobuf_ipv6.reference b/tests/queries/0_stateless/02751_protobuf_ipv6.reference new file mode 100644 index 00000000000..0318b49c77e --- /dev/null +++ b/tests/queries/0_stateless/02751_protobuf_ipv6.reference @@ -0,0 +1,2 @@ +::ffff:1.2.3.4 +::ffff:1.2.3.4 diff --git a/tests/queries/0_stateless/02751_protobuf_ipv6.sh b/tests/queries/0_stateless/02751_protobuf_ipv6.sh new file mode 100755 index 00000000000..f93963aa6c6 --- /dev/null +++ b/tests/queries/0_stateless/02751_protobuf_ipv6.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SCHEMADIR=$CURDIR/format_schemas + + +echo -ne '\x12\x1a\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x01\x02\x03\x04' | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/02751_protobuf_ipv6:Message" --structure="ipv6_bytes IPv6" -q "select * from table" + +$CLICKHOUSE_LOCAL -q "select '::ffff:1.2.3.4'::IPv6 as ipv6_bytes format Protobuf settings format_schema = '$SCHEMADIR/02751_protobuf_ipv6:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/02751_protobuf_ipv6:Message" --structure="ipv6_bytes IPv6" -q "select * from table" + diff --git a/tests/queries/0_stateless/02751_query_log_test_partitions.reference b/tests/queries/0_stateless/02751_query_log_test_partitions.reference new file mode 100644 index 00000000000..5a9f2163c0e --- /dev/null +++ b/tests/queries/0_stateless/02751_query_log_test_partitions.reference @@ -0,0 +1,2 @@ +3 3 +02751_query_log_test_partitions.3 diff --git a/tests/queries/0_stateless/02751_query_log_test_partitions.sql b/tests/queries/0_stateless/02751_query_log_test_partitions.sql new file mode 100644 index 00000000000..be047d1a46e --- /dev/null +++ b/tests/queries/0_stateless/02751_query_log_test_partitions.sql @@ -0,0 +1,20 @@ +set log_queries=1; +set log_queries_min_type='QUERY_FINISH'; + +DROP TABLE IF EXISTS 02751_query_log_test_partitions; +CREATE TABLE 02751_query_log_test_partitions (a Int64, b Int64) ENGINE = MergeTree PARTITION BY a ORDER BY b; + +INSERT INTO 02751_query_log_test_partitions SELECT number, number FROM numbers(10); + +SELECT * FROM 02751_query_log_test_partitions WHERE a = 3; + +SYSTEM FLUSH LOGS; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', partitions[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and + query = 'SELECT * FROM 02751_query_log_test_partitions WHERE a = 3;' diff --git a/tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.reference b/tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.reference new file mode 100644 index 00000000000..65e15e19c8b --- /dev/null +++ b/tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.reference @@ -0,0 +1,3 @@ +1 \N +1 \N +1 \N diff --git a/tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.sh b/tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.sh new file mode 100755 index 00000000000..e51079071ec --- /dev/null +++ b/tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (x UInt32, y Nullable(UInt32)) engine=MergeTree order by x" +$CLICKHOUSE_CLIENT -q "select '1\t\\\N\n2\t\\\' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=1 format TSV" +$CLICKHOUSE_CLIENT -q "select '1,\\\N\n2,\\\' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=1 format CSV" +$CLICKHOUSE_CLIENT -q "select '1\tNULL\n2\tN' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=2, format_custom_escaping_rule='Quoted' format CustomSeparated" +$CLICKHOUSE_CLIENT -q "select * from test" +$CLICKHOUSE_CLIENT -q "drop table test"; diff --git a/tests/queries/0_stateless/02752_custom_separated_ignore_spaces_bug.reference b/tests/queries/0_stateless/02752_custom_separated_ignore_spaces_bug.reference new file mode 100644 index 00000000000..f52004e62a3 --- /dev/null +++ b/tests/queries/0_stateless/02752_custom_separated_ignore_spaces_bug.reference @@ -0,0 +1 @@ +unquoted_string diff --git a/tests/queries/0_stateless/02752_custom_separated_ignore_spaces_bug.sql b/tests/queries/0_stateless/02752_custom_separated_ignore_spaces_bug.sql new file mode 100644 index 00000000000..62047a704a7 --- /dev/null +++ b/tests/queries/0_stateless/02752_custom_separated_ignore_spaces_bug.sql @@ -0,0 +1 @@ +select * from format(CustomSeparatedIgnoreSpaces, 'x String', ' unquoted_string\n') settings format_custom_escaping_rule='CSV'; diff --git a/tests/queries/0_stateless/02752_space_function.reference b/tests/queries/0_stateless/02752_space_function.reference new file mode 100644 index 00000000000..d265a843ba9 --- /dev/null +++ b/tests/queries/0_stateless/02752_space_function.reference @@ -0,0 +1,86 @@ +const, uint + 3 + 3 + 3 + 3 +const, int + 3 + 3 + 3 + 3 +const, int, negative + 0 + 0 + 0 + 0 +negative tests +null +\N +const, uint, multiple + + + + + + + + + + + + + + + + +const int, multiple + + + + + + + + + + + + + + + + +non-const, uint + 3 + 2 + 1 + 0 + 12 + 10 + 4 + 5 + 4 + 21 + 9 + 7 + 56 + 20 + 5 + 7 +non-const, int + 3 + 2 + 1 + 0 + 12 + 10 + 4 + 5 + 0 + 0 + 0 + 0 + 56 + 20 + 5 + 7 diff --git a/tests/queries/0_stateless/02752_space_function.sql b/tests/queries/0_stateless/02752_space_function.sql new file mode 100644 index 00000000000..b12906927df --- /dev/null +++ b/tests/queries/0_stateless/02752_space_function.sql @@ -0,0 +1,64 @@ +SELECT 'const, uint'; +SELECT space(3::UInt8), length(space(3::UInt8)); +SELECT space(3::UInt16), length(space(3::UInt16)); +SELECT space(3::UInt32), length(space(3::UInt32)); +SELECT space(3::UInt64), length(space(3::UInt64)); +SELECT 'const, int'; +SELECT space(3::Int8), length(space(3::Int8)); +SELECT space(3::Int16), length(space(3::Int16)); +SELECT space(3::Int32), length(space(3::Int32)); +SELECT space(3::Int64), length(space(3::Int64)); + +SELECT 'const, int, negative'; +SELECT space(-3::Int8), length(space(-3::Int8)); +SELECT space(-3::Int16), length(space(-3::Int16)); +SELECT space(-3::Int32), length(space(-3::Int32)); +SELECT space(-3::Int64), length(space(-3::Int64)); + +SELECT 'negative tests'; +SELECT space('abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT space(['abc']); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT space(('abc')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT space(30303030303030303030303030303030::UInt64); -- { serverError TOO_LARGE_STRING_SIZE } + +SELECT 'null'; +SELECT space(NULL); + +DROP TABLE IF EXISTS defaults; +CREATE TABLE defaults +( + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64 +) ENGINE = Memory(); + +INSERT INTO defaults values (3, 12, 4, 56, 3, 12, -4, 56) (2, 10, 21, 20, 2, 10, -21, 20) (1, 4, 9, 5, 1, 4, -9, 5) (0, 5, 7, 7, 0, 5, -7, 7); + +SELECT 'const, uint, multiple'; +SELECT space(30::UInt8) FROM defaults; +SELECT space(30::UInt16) FROM defaults; +SELECT space(30::UInt32) FROM defaults; +SELECT space(30::UInt64) FROM defaults; +SELECT 'const int, multiple'; +SELECT space(30::Int8) FROM defaults; +SELECT space(30::Int16) FROM defaults; +SELECT space(30::Int32) FROM defaults; +SELECT space(30::Int64) FROM defaults; + +SELECT 'non-const, uint'; +SELECT space(u8), length(space(u8)) FROM defaults; +SELECT space(u16), length(space(u16)) FROM defaults; +SELECT space(u32), length(space(u32)) from defaults; +SELECT space(u64), length(space(u64)) FROM defaults; +SELECT 'non-const, int'; +SELECT space(i8), length(space(i8)) FROM defaults; +SELECT space(i16), length(space(i16)) FROM defaults; +SELECT space(i32), length(space(i32)) from defaults; +SELECT space(i64), length(space(i64)) FROM defaults; + +DROP TABLE defaults; diff --git a/tests/queries/0_stateless/02760_dictionaries_memory.reference b/tests/queries/0_stateless/02760_dictionaries_memory.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 b/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 new file mode 100644 index 00000000000..ea979506e07 --- /dev/null +++ b/tests/queries/0_stateless/02760_dictionaries_memory.sql.j2 @@ -0,0 +1,32 @@ +-- Tags: long +-- Tag long: in parallel runs could exceed 60 seconds +{# vim: ft=sql #} + +SET max_memory_usage=0; +DROP DICTIONARY IF EXISTS dict; +DROP TABLE IF EXISTS dict_data; + +CREATE TABLE dict_data (key UInt64, value UInt64) Engine=Memory(); +INSERT INTO dict_data SELECT number, number%10 FROM numbers(3_000_000); + +SET max_memory_usage='4Mi'; +{% for layout in [ + 'FLAT(INITIAL_ARRAY_SIZE 3_000_000 MAX_ARRAY_SIZE 3_000_000)', + 'HASHED()', + 'HASHED_ARRAY()', + 'SPARSE_HASHED()', + 'SPARSE_HASHED(SHARDS 2 /* shards are special, they use threads */)', +] %} +CREATE DICTIONARY dict (key UInt64, value UInt64) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE dict_data)) LIFETIME(0) LAYOUT({{layout}}); +SYSTEM RELOAD DICTIONARY dict; +-- assert that dictionary in memory takes more than 20MB, that way for each +-- shard we will have 10MB, that way we ensure that the allocations will be +-- definitely correct for the memory tracker to hit the MEMORY_LIMIT_EXCEEDED +-- error. +SELECT throwIf(bytes_allocated < 20e6, 'Memory constraints does not matched for layout {{layout}}') FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict' FORMAT Null; +DROP DICTIONARY dict; + +CREATE DICTIONARY dict (key UInt64, value UInt64) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE dict_data)) LIFETIME(0) LAYOUT({{layout}}); +SELECT dictGet('dict', 'value', 1::UInt64) FORMAT Null; +DROP DICTIONARY dict; +{% endfor %} diff --git a/tests/queries/0_stateless/02761_ddl_initial_query_id.reference b/tests/queries/0_stateless/02761_ddl_initial_query_id.reference new file mode 100644 index 00000000000..5c6f448eed5 --- /dev/null +++ b/tests/queries/0_stateless/02761_ddl_initial_query_id.reference @@ -0,0 +1,5 @@ +default distributed_ddl_entry_format_version +DROP TABLE IF EXISTS foo ON CLUSTER test_shard_localhost +distributed_ddl_entry_format_version=PRESERVE_INITIAL_QUERY_ID_VERSION +DROP TABLE IF EXISTS default.foo +DROP TABLE IF EXISTS foo ON CLUSTER test_shard_localhost diff --git a/tests/queries/0_stateless/02761_ddl_initial_query_id.sh b/tests/queries/0_stateless/02761_ddl_initial_query_id.sh new file mode 100755 index 00000000000..e9a315b812b --- /dev/null +++ b/tests/queries/0_stateless/02761_ddl_initial_query_id.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "default distributed_ddl_entry_format_version" +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" --distributed_ddl_output_mode=none -q "DROP TABLE IF EXISTS foo ON CLUSTER test_shard_localhost" +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SELECT query FROM system.query_log WHERE initial_query_id = '$query_id' AND type != 'QueryStart'" + +echo "distributed_ddl_entry_format_version=PRESERVE_INITIAL_QUERY_ID_VERSION" +PRESERVE_INITIAL_QUERY_ID_VERSION=5 +query_id="$(random_str 10)" +# Check that serialization will not be broken with new lines in initial_query_id +query_id+=$'\nfoo' +$CLICKHOUSE_CLIENT --distributed_ddl_entry_format_version=$PRESERVE_INITIAL_QUERY_ID_VERSION --query_id "$query_id" --distributed_ddl_output_mode=none -q "DROP TABLE IF EXISTS foo ON CLUSTER test_shard_localhost" +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +# - normalizeQuery() is required to strip out DDL comment +# - replace() is required to avoid non deterministic behaviour of +# normalizeQuery() that replaces the identifier with "?" only if it has more +# then two numbers. +$CLICKHOUSE_CLIENT -q "SELECT normalizeQuery(replace(query, currentDatabase(), 'default')) FROM system.query_log WHERE initial_query_id = '$query_id' AND type != 'QueryStart' ORDER BY event_time_microseconds" diff --git a/tests/queries/0_stateless/02762_replicated_database_no_args.reference b/tests/queries/0_stateless/02762_replicated_database_no_args.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02762_replicated_database_no_args.sql b/tests/queries/0_stateless/02762_replicated_database_no_args.sql new file mode 100644 index 00000000000..1409a059b02 --- /dev/null +++ b/tests/queries/0_stateless/02762_replicated_database_no_args.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel + +set allow_experimental_database_replicated=1; +create database replicated_db_no_args engine=Replicated; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02763_jit_compare_functions_nan.reference b/tests/queries/0_stateless/02763_jit_compare_functions_nan.reference new file mode 100644 index 00000000000..0fc42c1b712 --- /dev/null +++ b/tests/queries/0_stateless/02763_jit_compare_functions_nan.reference @@ -0,0 +1,7 @@ +-- +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02763_jit_compare_functions_nan.sql b/tests/queries/0_stateless/02763_jit_compare_functions_nan.sql new file mode 100644 index 00000000000..61d165139d6 --- /dev/null +++ b/tests/queries/0_stateless/02763_jit_compare_functions_nan.sql @@ -0,0 +1,25 @@ +SET compile_expressions = 1; +SET min_count_to_compile_expression = 0; + +DROP TABLE IF EXISTS test_table_1; +DROP TABLE IF EXISTS test_table_2; + +CREATE TABLE test_table_1 (id UInt32) ENGINE = MergeTree ORDER BY (id); +create table test_table_2 (id UInt32) ENGINE = MergeTree ORDER BY (id); +INSERT INTO test_table_1 VALUES (2); +INSERT INTO test_table_2 VALUES (2); + +select t1.id, t2.id FROM test_table_1 AS t1 RIGHT JOIN test_table_2 AS t2 ON (t1.id = t2.id) +WHERE (acos(t2.id) <> atan(t1.id)) and (not (acos(t2.id) <> atan(t1.id))); + +DROP TABLE test_table_1; +DROP TABLE test_table_2; + +SELECT '--'; + +SELECT (acos(a) <> atan(b)) and (not (acos(a) <> atan(b))) r FROM (SELECT 2 a, 2 b); +SELECT (acos(a) <> atan(b)) and (not (acos(a) <> atan(b))) r FROM (SELECT 2 a, 2 b); +SELECT (acos(a) <> atan(b)) and (not (acos(a) <> atan(b))) r FROM (SELECT 2 a, 2 b); +SELECT (acos(a) <> atan(b)) and (not (acos(a) <> atan(b))) r FROM (SELECT 2 a, 2 b); +SELECT (acos(a) <> atan(b)) and (not (acos(a) <> atan(b))) r FROM (SELECT 2 a, 2 b); +SELECT (acos(a) <> atan(b)) and (not (acos(a) <> atan(b))) r FROM (SELECT 2 a, 2 b); diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference new file mode 100644 index 00000000000..e5c608ddc1a --- /dev/null +++ b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference @@ -0,0 +1,39 @@ +SELECT value1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((date1 < \'1993-01-01\') OR (date1 > \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 > \'1993-12-31\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 <= \'1993-12-31\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1997-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) OR ((date1 >= \'1994-01-01\') AND (date1 <= \'1994-12-31\'))) AND ((id >= 1) AND (id <= 3)) +SELECT + value1, + toYear(date1) AS year1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql new file mode 100644 index 00000000000..563468d4f82 --- /dev/null +++ b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.reference b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql new file mode 100644 index 00000000000..aaf68dfd300 --- /dev/null +++ b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS parallel_replicas_plain (x String) ENGINE=MergeTree() ORDER BY x; +INSERT INTO parallel_replicas_plain SELECT toString(number) FROM numbers(10); + +SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; +SET send_logs_level='error'; +SET parallel_replicas_for_non_replicated_merge_tree = 0; + +SELECT x FROM parallel_replicas_plain LIMIT 1 FORMAT Null; +SELECT max(length(x)) FROM parallel_replicas_plain FORMAT Null; + +SET parallel_replicas_for_non_replicated_merge_tree = 1; + +SELECT x FROM parallel_replicas_plain LIMIT 1 FORMAT Null; +SELECT max(length(x)) FROM parallel_replicas_plain FORMAT Null; + +DROP TABLE IF EXISTS parallel_replicas_plain; diff --git a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.reference b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql new file mode 100644 index 00000000000..f447051e1e5 --- /dev/null +++ b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS parallel_replicas_final (x String) ENGINE=ReplacingMergeTree() ORDER BY x; + +INSERT INTO parallel_replicas_final SELECT toString(number) FROM numbers(10); + +SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; +SET parallel_replicas_for_non_replicated_merge_tree = 1; + +SELECT * FROM parallel_replicas_final FINAL FORMAT Null; + +SET allow_experimental_parallel_reading_from_replicas=2; + +SELECT * FROM parallel_replicas_final FINAL FORMAT Null; -- { serverError SUPPORT_IS_DISABLED } + +DROP TABLE IF EXISTS parallel_replicas_final; diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.reference b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.reference new file mode 100644 index 00000000000..d21f914f0dc --- /dev/null +++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.reference @@ -0,0 +1,24 @@ +view allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 0 1 0 1 2 3 +subquery allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 0 0 1 0 2 2 +CSE allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 0 0 1 0 2 2 +CSE_Multi allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 0 0 1 0 2 2 +CTE allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 0 0 1 0 2 2 +CTE_Multi allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 0 0 1 0 4 4 +view allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 1 1 0 1 3 4 +subquery allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 1 0 1 0 2 2 +CSE allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 1 0 1 0 2 2 +CSE_Multi allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 1 0 1 0 2 2 +CTE allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 1 0 1 0 2 2 +CTE_Multi allow_experimental_analyzer InsertQuery SelectQuery InsertQueriesWithSubqueries SelectQueriesWithSubqueries QueriesWithSubqueries +1 1 0 1 0 4 4 diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh new file mode 100755 index 00000000000..cded0b28409 --- /dev/null +++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS mv; + DROP TABLE IF EXISTS output; + DROP TABLE IF EXISTS input; + + CREATE TABLE input (key Int) Engine=Null; + CREATE TABLE output AS input Engine=Null; + CREATE MATERIALIZED VIEW mv TO output AS SELECT * FROM input; +" + +for allow_experimental_analyzer in 0 1; do + query_id="$(random_str 10)" + $CLICKHOUSE_CLIENT --allow_experimental_analyzer "$allow_experimental_analyzer" --query_id "$query_id" -q "INSERT INTO input SELECT * FROM numbers(1)" + $CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + 1 view, + $allow_experimental_analyzer allow_experimental_analyzer, + ProfileEvents['InsertQuery'] InsertQuery, + ProfileEvents['SelectQuery'] SelectQuery, + ProfileEvents['InsertQueriesWithSubqueries'] InsertQueriesWithSubqueries, + -- FIXME: for analyzer it will have one more for sample block + ProfileEvents['SelectQueriesWithSubqueries'] SelectQueriesWithSubqueries, + ProfileEvents['QueriesWithSubqueries'] QueriesWithSubqueries + FROM system.query_log + WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query_id = '$query_id' + FORMAT TSVWithNames; + " + + query_id="$(random_str 10)" + $CLICKHOUSE_CLIENT --allow_experimental_analyzer "$allow_experimental_analyzer" --query_id "$query_id" -q "SELECT * FROM system.one WHERE dummy IN (SELECT * FROM system.one) FORMAT Null" + $CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + 1 subquery, + $allow_experimental_analyzer allow_experimental_analyzer, + ProfileEvents['InsertQuery'] InsertQuery, + ProfileEvents['SelectQuery'] SelectQuery, + ProfileEvents['InsertQueriesWithSubqueries'] InsertQueriesWithSubqueries, + ProfileEvents['SelectQueriesWithSubqueries'] SelectQueriesWithSubqueries, + ProfileEvents['QueriesWithSubqueries'] QueriesWithSubqueries + FROM system.query_log + WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query_id = '$query_id' + FORMAT TSVWithNames; + " + + query_id="$(random_str 10)" + $CLICKHOUSE_CLIENT --allow_experimental_analyzer "$allow_experimental_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x FORMAT Null" + $CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + 1 CSE, + $allow_experimental_analyzer allow_experimental_analyzer, + ProfileEvents['InsertQuery'] InsertQuery, + ProfileEvents['SelectQuery'] SelectQuery, + ProfileEvents['InsertQueriesWithSubqueries'] InsertQueriesWithSubqueries, + ProfileEvents['SelectQueriesWithSubqueries'] SelectQueriesWithSubqueries, + ProfileEvents['QueriesWithSubqueries'] QueriesWithSubqueries + FROM system.query_log + WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query_id = '$query_id' + FORMAT TSVWithNames; + " + + query_id="$(random_str 10)" + $CLICKHOUSE_CLIENT --allow_experimental_analyzer "$allow_experimental_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x, x FORMAT Null" + $CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + 1 CSE_Multi, + $allow_experimental_analyzer allow_experimental_analyzer, + ProfileEvents['InsertQuery'] InsertQuery, + ProfileEvents['SelectQuery'] SelectQuery, + ProfileEvents['InsertQueriesWithSubqueries'] InsertQueriesWithSubqueries, + ProfileEvents['SelectQueriesWithSubqueries'] SelectQueriesWithSubqueries, + ProfileEvents['QueriesWithSubqueries'] QueriesWithSubqueries + FROM system.query_log + WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query_id = '$query_id' + FORMAT TSVWithNames; + " + + query_id="$(random_str 10)" + $CLICKHOUSE_CLIENT --allow_experimental_analyzer "$allow_experimental_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x FORMAT Null" + $CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + 1 CTE, + $allow_experimental_analyzer allow_experimental_analyzer, + ProfileEvents['InsertQuery'] InsertQuery, + ProfileEvents['SelectQuery'] SelectQuery, + ProfileEvents['InsertQueriesWithSubqueries'] InsertQueriesWithSubqueries, + ProfileEvents['SelectQueriesWithSubqueries'] SelectQueriesWithSubqueries, + ProfileEvents['QueriesWithSubqueries'] QueriesWithSubqueries + FROM system.query_log + WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query_id = '$query_id' + FORMAT TSVWithNames; + " + + query_id="$(random_str 10)" + $CLICKHOUSE_CLIENT --allow_experimental_analyzer "$allow_experimental_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x UNION ALL SELECT * FROM x FORMAT Null" + $CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + 1 CTE_Multi, + $allow_experimental_analyzer allow_experimental_analyzer, + ProfileEvents['InsertQuery'] InsertQuery, + ProfileEvents['SelectQuery'] SelectQuery, + ProfileEvents['InsertQueriesWithSubqueries'] InsertQueriesWithSubqueries, + ProfileEvents['SelectQueriesWithSubqueries'] SelectQueriesWithSubqueries, + ProfileEvents['QueriesWithSubqueries'] QueriesWithSubqueries + FROM system.query_log + WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query_id = '$query_id' + FORMAT TSVWithNames; + " +done diff --git a/tests/queries/0_stateless/02767_into_outfile_extensions_msan.reference b/tests/queries/0_stateless/02767_into_outfile_extensions_msan.reference new file mode 100644 index 00000000000..0c8b489c2fd --- /dev/null +++ b/tests/queries/0_stateless/02767_into_outfile_extensions_msan.reference @@ -0,0 +1,2 @@ +Expression ((Projection + Before ORDER BY)) + ReadFromStorage (SystemNumbers) diff --git a/tests/queries/0_stateless/02767_into_outfile_extensions_msan.sh b/tests/queries/0_stateless/02767_into_outfile_extensions_msan.sh new file mode 100755 index 00000000000..0c5767314d5 --- /dev/null +++ b/tests/queries/0_stateless/02767_into_outfile_extensions_msan.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +out="explain1.$CLICKHOUSE_TEST_UNIQUE_NAME.out" +# only EXPLAIN triggers the problem under MSan +$CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -q "explain select * from numbers(1) into outfile '$out'" +cat "$out" +rm -f "$out" diff --git a/tests/queries/0_stateless/02768_into_outfile_extensions_format.reference b/tests/queries/0_stateless/02768_into_outfile_extensions_format.reference new file mode 100644 index 00000000000..4ebc1da8865 --- /dev/null +++ b/tests/queries/0_stateless/02768_into_outfile_extensions_format.reference @@ -0,0 +1,20 @@ +SELECT * +FROM numbers(1) +INTO OUTFILE '/dev/null' +; + +SELECT * +FROM numbers(1) +INTO OUTFILE '/dev/null' AND STDOUT +; + +SELECT * +FROM numbers(1) +INTO OUTFILE '/dev/null' APPEND +; + +SELECT * +FROM numbers(1) +INTO OUTFILE '/dev/null' APPEND AND STDOUT +; + diff --git a/tests/queries/0_stateless/02768_into_outfile_extensions_format.sh b/tests/queries/0_stateless/02768_into_outfile_extensions_format.sh new file mode 100755 index 00000000000..756488076f9 --- /dev/null +++ b/tests/queries/0_stateless/02768_into_outfile_extensions_format.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo " +select * from numbers(1) into outfile '/dev/null'; +select * from numbers(1) into outfile '/dev/null' and stdout; +select * from numbers(1) into outfile '/dev/null' append; +select * from numbers(1) into outfile '/dev/null' append and stdout; +" | clickhouse-format -n diff --git a/tests/queries/0_stateless/02769_nan_equality_comparison.reference b/tests/queries/0_stateless/02769_nan_equality_comparison.reference new file mode 100644 index 00000000000..a8ba06cfce6 --- /dev/null +++ b/tests/queries/0_stateless/02769_nan_equality_comparison.reference @@ -0,0 +1,10 @@ +nan 0 0 0 0 +nan 0 0 0 0 +nan nan 0 0 0 0 +-- +-- +nan 1 1 1 1 +nan 1 1 1 1 +nan nan 1 1 1 1 +-- +nan diff --git a/tests/queries/0_stateless/02769_nan_equality_comparison.sql b/tests/queries/0_stateless/02769_nan_equality_comparison.sql new file mode 100644 index 00000000000..6cce19a2204 --- /dev/null +++ b/tests/queries/0_stateless/02769_nan_equality_comparison.sql @@ -0,0 +1,42 @@ +SET compile_expressions = 1; +SET min_count_to_compile_expression = 0; + +SELECT nan AS value, value = value, value = materialize(value), materialize(value) = value, materialize(value) = materialize(value); +SELECT cast(nan, 'Float32') AS value, value = value, value = materialize(value), materialize(value) = value, materialize(value) = materialize(value); +SELECT nan AS lhs, cast(nan, 'Float32') AS rhs, lhs = rhs, lhs = materialize(rhs), materialize(lhs) = rhs, materialize(lhs) = materialize(rhs); + +SELECT '--'; + +CREATE TABLE test_table +( + id UInt32, + value UInt32 +) ENGINE = MergeTree ORDER BY id; +INSERT INTO test_table VALUES (76, 57); + +SELECT value FROM (SELECT stddevSamp(id) AS value FROM test_table) as subquery +WHERE ((value = value) AND (NOT (value = value))); + +DROP TABLE test_table; + +SELECT '--'; + +SELECT nan AS value, value != value, value != materialize(value), materialize(value) != value, materialize(value) != materialize(value); +SELECT cast(nan, 'Float32') AS value, value != value, value != materialize(value), materialize(value) != value, materialize(value) != materialize(value); +SELECT nan AS lhs, cast(nan, 'Float32') AS rhs, lhs != rhs, lhs != materialize(rhs), materialize(lhs) != rhs, materialize(lhs) != materialize(rhs); + +SELECT '--'; + +CREATE TABLE test_table +( + id UInt32, + value_1 UInt32, + value_2 Float32 +) ENGINE = MergeTree ORDER BY id; +INSERT INTO test_table VALUES (12000, 36, 77.94); + +SELECT value +FROM (SELECT (corr(value_1, value_1) OVER test_window) AS value FROM test_table WINDOW test_window AS (PARTITION BY value_2 ORDER BY id ASC)) as subquery +WHERE not (not (value <> value)); + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02770_jit_aggregation_nullable_key_fix.reference b/tests/queries/0_stateless/02770_jit_aggregation_nullable_key_fix.reference new file mode 100644 index 00000000000..7d604c076f4 --- /dev/null +++ b/tests/queries/0_stateless/02770_jit_aggregation_nullable_key_fix.reference @@ -0,0 +1,4 @@ +1048576 +65411 +1048576 +65411 diff --git a/tests/queries/0_stateless/02770_jit_aggregation_nullable_key_fix.sql b/tests/queries/0_stateless/02770_jit_aggregation_nullable_key_fix.sql new file mode 100644 index 00000000000..e4ce789f4da --- /dev/null +++ b/tests/queries/0_stateless/02770_jit_aggregation_nullable_key_fix.sql @@ -0,0 +1,39 @@ +SET compile_aggregate_expressions = 1; +SET min_count_to_compile_aggregate_expression = 0; +SET group_by_use_nulls = 0; + +SELECT count() FROM +( + SELECT + count([NULL, NULL]), + count([2147483646, -2147483647, 3, 3]), + uniqExact(if(number >= 1048577, number, NULL), NULL) + FROM numbers(1048577) + GROUP BY if(number >= 2., number, NULL) +); + +SELECT count() FROM +( + SELECT count() + FROM numbers(65411) + GROUP BY if(number < 1, NULL, number) +); + +SET group_by_use_nulls = 1; + +SELECT count() FROM +( + SELECT + count([NULL, NULL]), + count([2147483646, -2147483647, 3, 3]), + uniqExact(if(number >= 1048577, number, NULL), NULL) + FROM numbers(1048577) + GROUP BY if(number >= 2., number, NULL) +); + +SELECT count() FROM +( + SELECT count() + FROM numbers(65411) + GROUP BY if(number < 1, NULL, number) +); diff --git a/tests/queries/0_stateless/add-test b/tests/queries/0_stateless/add-test index e8e68cf174e..dbff8212020 100755 --- a/tests/queries/0_stateless/add-test +++ b/tests/queries/0_stateless/add-test @@ -10,7 +10,7 @@ TESTS_PATH=$(dirname ${BASH_SOURCE[0]}) set -ue # shellcheck disable=SC2010 -LAST_TEST_NO=$(ls -1 ${TESTS_PATH} | grep -P -o '^\d+' | sort -nr | head -1) +LAST_TEST_NO=$(ls -1 ${TESTS_PATH} | grep -E -o '^[0-9]+' | sort -nr | head -1) # remove leading zeros, increment and add padding zeros to 5 digits NEW_TEST_NO=$(printf "%05d\n" $((10#$LAST_TEST_NO + 1))) diff --git a/tests/queries/0_stateless/data_parquet/02588_data.parquet b/tests/queries/0_stateless/data_parquet/02588_data.parquet new file mode 100644 index 00000000000..e00b869233d Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02588_data.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/02716_data.parquet b/tests/queries/0_stateless/data_parquet/02716_data.parquet new file mode 100644 index 00000000000..21875ca286c Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02716_data.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/02718_data.parquet b/tests/queries/0_stateless/data_parquet/02718_data.parquet new file mode 100644 index 00000000000..6a930689c83 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02718_data.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/02725_data.parquet b/tests/queries/0_stateless/data_parquet/02725_data.parquet new file mode 100644 index 00000000000..5b4842c9dbd Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02725_data.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns b/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns index c6bb5057cc2..dc094bef8ed 100644 --- a/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns +++ b/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns @@ -1 +1 @@ -`a` Nullable(String), `b` Array(Nullable(Int32)), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Array(Nullable(Int32)) +`a` Nullable(String), `b` Nullable(Int32), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Array(Nullable(Int32)) diff --git a/tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp b/tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp new file mode 100644 index 00000000000..f999043e2d2 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02566_ipv4_ipv6.capnp @@ -0,0 +1,6 @@ +@0xb6ecde1cd54a101d; + +struct Message { + ipv4 @0 :UInt32; + ipv6 @1 :Data; +} diff --git a/tests/queries/0_stateless/format_schemas/02705_big_numbers.capnp b/tests/queries/0_stateless/format_schemas/02705_big_numbers.capnp new file mode 100644 index 00000000000..67ef4637454 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02705_big_numbers.capnp @@ -0,0 +1,10 @@ +@0xdbb9ad1f14bf0b36; + +struct Message { + int128 @0 :Data; + uint128 @1 :Data; + int256 @2 :Data; + uint256 @3 :Data; + decimal128 @4 :Data; + decimal256 @5 :Data; +} diff --git a/tests/queries/0_stateless/format_schemas/02705_map.capnp b/tests/queries/0_stateless/format_schemas/02705_map.capnp new file mode 100644 index 00000000000..91f0125feeb --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02705_map.capnp @@ -0,0 +1,14 @@ +@0xdbb9ad1f14bf0b36; + +struct Message { + struct Map { + struct Entry { + key @0 : Text; + value @1 : UInt32; + } + + entries @0 : List(Entry); + } + + map @0 : Map; +} diff --git a/tests/queries/0_stateless/format_schemas/02707_schema.proto b/tests/queries/0_stateless/format_schemas/02707_schema.proto new file mode 100644 index 00000000000..afbc1f854b1 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02707_schema.proto @@ -0,0 +1,11 @@ +syntax = "proto3"; + +message Nested { + int32 a = 1; + string b = 2; + repeated int32 c = 3; +}; + +message Message { + Nested x = 1; +}; diff --git a/tests/queries/0_stateless/format_schemas/02710_schema.proto b/tests/queries/0_stateless/format_schemas/02710_schema.proto new file mode 100644 index 00000000000..d6bf0ad350a --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02710_schema.proto @@ -0,0 +1,11 @@ +syntax = "proto3"; + +message Message +{ + uint32 ipv4 = 1; + bytes ipv4_bytes = 2; + int64 ipv4_int64 = 3; + int32 date32 = 4; + bytes date32_bytes = 5; + int64 date32_int64 = 6; +} diff --git a/tests/queries/0_stateless/format_schemas/02751_protobuf_ipv6.proto b/tests/queries/0_stateless/format_schemas/02751_protobuf_ipv6.proto new file mode 100644 index 00000000000..8e6f115f2d7 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02751_protobuf_ipv6.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message Message +{ + bytes ipv6_bytes = 3; +} diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 7c7db48e078..1805b56f8dc 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -29,8 +29,10 @@ function try_sync_replicas() i=0 for t in "${tables_arr[@]}" do - # The size of log may be big, so increase timeout. - $CLICKHOUSE_CLIENT --receive_timeout $time_left -q "SYSTEM SYNC REPLICA $t" || ($CLICKHOUSE_CLIENT -q \ + # Do not start new merges (it can make SYNC a bit faster) + $CLICKHOUSE_CLIENT -q "ALTER TABLE $t MODIFY SETTING max_replicated_merges_in_queue=0" + + $CLICKHOUSE_CLIENT --receive_timeout $time_left -q "SYSTEM SYNC REPLICA $t STRICT" || ($CLICKHOUSE_CLIENT -q \ "select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t' order by database, table, node_name" && exit 1) & pids[${i}]=$! i=$((i + 1)) @@ -79,9 +81,9 @@ function check_replication_consistency() # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") - $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA $some_table PULL" 1>/dev/null 2>/dev/null ||: some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") - $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA $some_table PULL" 1>/dev/null 2>/dev/null ||: # Forcefully cancel mutations to avoid waiting for them to finish ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$table_name_prefix%'" > /dev/null diff --git a/tests/queries/1_stateful/00081_group_by_without_key_and_totals.sql b/tests/queries/1_stateful/00081_group_by_without_key_and_totals.sql index d9653f48552..03e9e0feb40 100644 --- a/tests/queries/1_stateful/00081_group_by_without_key_and_totals.sql +++ b/tests/queries/1_stateful/00081_group_by_without_key_and_totals.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + SELECT count() AS c FROM test.hits WHERE CounterID = 1704509 WITH TOTALS SETTINGS totals_mode = 'before_having', max_rows_to_group_by = 100000, group_by_overflow_mode = 'any'; SELECT count() AS c FROM test.hits WHERE CounterID = 1704509 WITH TOTALS SETTINGS totals_mode = 'after_having_inclusive', max_rows_to_group_by = 100000, group_by_overflow_mode = 'any'; SELECT count() AS c FROM test.hits WHERE CounterID = 1704509 WITH TOTALS SETTINGS totals_mode = 'after_having_exclusive', max_rows_to_group_by = 100000, group_by_overflow_mode = 'any'; diff --git a/tests/queries/1_stateful/00151_replace_partition_with_different_granularity.reference b/tests/queries/1_stateful/00151_replace_partition_with_different_granularity.reference deleted file mode 100644 index 31d3e6d14da..00000000000 --- a/tests/queries/1_stateful/00151_replace_partition_with_different_granularity.reference +++ /dev/null @@ -1,3 +0,0 @@ -8873898 -8873899 -8873899 diff --git a/tests/queries/1_stateful/00151_replace_partition_with_different_granularity.sql b/tests/queries/1_stateful/00151_replace_partition_with_different_granularity.sql deleted file mode 100644 index c1a2001e2a5..00000000000 --- a/tests/queries/1_stateful/00151_replace_partition_with_different_granularity.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Tags: no-tsan - -DROP TABLE IF EXISTS mixed_granularity_table; - -CREATE TABLE mixed_granularity_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, enable_mixed_granularity_parts=1; -- same with hits, but enabled mixed granularity - -INSERT INTO mixed_granularity_table SELECT * FROM test.hits LIMIT 10; - -ALTER TABLE mixed_granularity_table REPLACE PARTITION 201403 FROM test.hits; - -SELECT COUNT() FROM mixed_granularity_table; - -INSERT INTO mixed_granularity_table SELECT * FROM test.hits LIMIT 1; - -SELECT COUNT() FROM mixed_granularity_table; - -OPTIMIZE TABLE mixed_granularity_table FINAL; - -SELECT COUNT() FROM mixed_granularity_table; - -CREATE TABLE non_mixed_granularity_non_adaptive_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, index_granularity_bytes=0; -- same with hits, but enabled mixed granularity and fixed_granularity - -INSERT INTO non_mixed_granularity_non_adaptive_table SELECT * FROM test.hits LIMIT 10; - --- after optimize mixed_granularity_table will have .mrk2 parts -ALTER TABLE non_mixed_granularity_non_adaptive_table REPLACE PARTITION 201403 FROM mixed_granularity_table; -- { serverError 36 } - -DROP TABLE IF EXISTS non_mixed_granularity_non_adaptive_table; - - -DROP TABLE IF EXISTS mixed_granularity_strictly_non_adaptive_table; - -CREATE TABLE mixed_granularity_strictly_non_adaptive_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, enable_mixed_granularity_parts=1, index_granularity_bytes=0; -- same with hits, but enabled mixed granularity and fixed_granularity - -INSERT INTO mixed_granularity_strictly_non_adaptive_table SELECT * FROM test.hits LIMIT 10; - -ALTER TABLE mixed_granularity_strictly_non_adaptive_table REPLACE PARTITION 201403 FROM mixed_granularity_table; -- { serverError 36 } - -DROP TABLE IF EXISTS mixed_granularity_table; - -DROP TABLE IF EXISTS mixed_granularity_strictly_non_adaptive_table; diff --git a/tests/queries/1_stateful/00172_early_constant_folding.reference b/tests/queries/1_stateful/00172_early_constant_folding.reference index da564dc694e..6b72183c066 100644 --- a/tests/queries/1_stateful/00172_early_constant_folding.reference +++ b/tests/queries/1_stateful/00172_early_constant_folding.reference @@ -1,6 +1,9 @@ (Expression) -ExpressionTransform - (ReadFromStorage) - AggregatingTransform - ExpressionTransform - SourceFromSingleChunk 0 → 1 +ExpressionTransform × 10 + (Aggregating) + Resize 1 → 10 + AggregatingTransform + (Expression) + ExpressionTransform + (ReadFromPreparedSource) + SourceFromSingleChunk 0 → 1 diff --git a/tests/queries/1_stateful/00172_early_constant_folding.sql b/tests/queries/1_stateful/00172_early_constant_folding.sql index b31e418b492..1ed7b8719b4 100644 --- a/tests/queries/1_stateful/00172_early_constant_folding.sql +++ b/tests/queries/1_stateful/00172_early_constant_folding.sql @@ -1,3 +1,4 @@ -- Tags: no-parallel-replicas +set max_threads=10; EXPLAIN PIPELINE SELECT count(JavaEnable) FROM test.hits WHERE WatchID = 1 OR Title = 'next' OR URL = 'prev' OR URL = '???' OR 1; diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.reference b/tests/queries/1_stateful/00173_group_by_use_nulls.reference index 02723bf14dd..e82b996ad3c 100644 --- a/tests/queries/1_stateful/00173_group_by_use_nulls.reference +++ b/tests/queries/1_stateful/00173_group_by_use_nulls.reference @@ -8,3 +8,25 @@ 59183 1336 33010362 1336 800784 1336 +-- { echoOn } +set allow_experimental_analyzer = 1; +SELECT + CounterID AS k, + quantileBFloat16(0.5)(ResolutionWidth) +FROM remote('127.0.0.{1,2}', test, hits) +GROUP BY k +ORDER BY + count() DESC, + CounterID ASC +LIMIT 10 +SETTINGS group_by_use_nulls = 1; +1704509 1384 +732797 1336 +598875 1384 +792887 1336 +3807842 1336 +25703952 1336 +716829 1384 +59183 1336 +33010362 1336 +800784 1336 diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.sql b/tests/queries/1_stateful/00173_group_by_use_nulls.sql index 7acacc4e579..8531e9efaf8 100644 --- a/tests/queries/1_stateful/00173_group_by_use_nulls.sql +++ b/tests/queries/1_stateful/00173_group_by_use_nulls.sql @@ -8,3 +8,28 @@ ORDER BY CounterID ASC LIMIT 10 SETTINGS group_by_use_nulls = 1; + +SELECT + CounterID AS k, + quantileBFloat16(0.5)(ResolutionWidth) +FROM test.hits +GROUP BY k +ORDER BY + count() DESC, + CounterID ASC +LIMIT 10 +SETTINGS group_by_use_nulls = 1 FORMAT Null; + +-- { echoOn } +set allow_experimental_analyzer = 1; + +SELECT + CounterID AS k, + quantileBFloat16(0.5)(ResolutionWidth) +FROM remote('127.0.0.{1,2}', test, hits) +GROUP BY k +ORDER BY + count() DESC, + CounterID ASC +LIMIT 10 +SETTINGS group_by_use_nulls = 1; diff --git a/tests/queries/1_stateful/00174_distinct_in_order.reference b/tests/queries/1_stateful/00174_distinct_in_order.reference index c0b2aadc20e..084efe0092c 100644 --- a/tests/queries/1_stateful/00174_distinct_in_order.reference +++ b/tests/queries/1_stateful/00174_distinct_in_order.reference @@ -1,3 +1,3 @@ -- check that distinct with and w/o optimization produce the same result --- DISTINCT colums are the same as in ORDER BY --- DISTINCT colums has prefix in ORDER BY columns +-- DISTINCT columns are the same as in ORDER BY +-- DISTINCT columns has prefix in ORDER BY columns diff --git a/tests/queries/1_stateful/00174_distinct_in_order.sql b/tests/queries/1_stateful/00174_distinct_in_order.sql index 08ab219c806..aac54d46181 100644 --- a/tests/queries/1_stateful/00174_distinct_in_order.sql +++ b/tests/queries/1_stateful/00174_distinct_in_order.sql @@ -3,7 +3,7 @@ select '-- check that distinct with and w/o optimization produce the same result drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; -select '-- DISTINCT colums are the same as in ORDER BY'; +select '-- DISTINCT columns are the same as in ORDER BY'; create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID, EventDate settings optimize_distinct_in_order=1; create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); @@ -13,7 +13,7 @@ select distinct * from distinct_in_order except select * from ordinary_distinct; drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; -select '-- DISTINCT colums has prefix in ORDER BY columns'; +select '-- DISTINCT columns has prefix in ORDER BY columns'; create table distinct_in_order (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); insert into distinct_in_order select distinct CounterID, EventDate from test.hits order by CounterID settings optimize_distinct_in_order=1; create table ordinary_distinct (CounterID UInt32, EventDate Date) engine=MergeTree() order by (CounterID, EventDate); diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.reference b/tests/queries/1_stateful/00177_memory_bound_merging.reference index a96cfe2af9e..5689152d60a 100644 --- a/tests/queries/1_stateful/00177_memory_bound_merging.reference +++ b/tests/queries/1_stateful/00177_memory_bound_merging.reference @@ -10,10 +10,6 @@ http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 2014-03-18 http:/ http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 2014-03-19 http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 2014-03-20 http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 1 +SortingAggregatedTransform MergingAggregatedBucketTransform -FinishAggregatingInOrderTransform -SortingAggregatedForMemoryBoundMergingTransform -MergingAggregatedBucketTransform -FinishAggregatingInOrderTransform -AggregatingInOrderTransform -MergeTreeInOrder +GroupingAggregatedTransform diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index b01c62a7c92..008422be108 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -34,7 +34,7 @@ test1() { GROUP BY CounterID, URL, EventDate ORDER BY URL, EventDate LIMIT 5 OFFSET 10 - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, use_hedged_requests = 0" + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0" check_replicas_read_in_order $query_id } @@ -51,7 +51,7 @@ test2() { GROUP BY URL, EventDate ORDER BY URL, EventDate LIMIT 5 OFFSET 10 - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, use_hedged_requests = 0, query_plan_aggregation_in_order = 1" + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0, query_plan_aggregation_in_order = 1" check_replicas_read_in_order $query_id } @@ -67,7 +67,7 @@ test3() { FROM test.hits WHERE CounterID = 1704509 AND UserID = 4322253409885123546 GROUP BY URL, EventDate - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, use_hedged_requests = 0 + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0 ) WHERE explain LIKE '%Aggr%Transform%' OR explain LIKE '%InOrder%'" } diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 3006b74d3f9..ef70c82aefc 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# shellcheck disable=SC2120 # Don't check for ODR violation, since we may test shared build with ASAN export ASAN_OPTIONS=detect_odr_violation=0 @@ -136,12 +137,13 @@ function clickhouse_client_removed_host_parameter() function wait_for_queries_to_finish() { + local max_tries="${1:-20}" # Wait for all queries to finish (query may still be running if thread is killed by timeout) num_tries=0 while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query NOT LIKE '%system.processes%'") -ne 0 ]]; do sleep 0.5; num_tries=$((num_tries+1)) - if [ $num_tries -eq 20 ]; then + if [ $num_tries -eq $max_tries ]; then $CLICKHOUSE_CLIENT -q "SELECT * FROM system.processes WHERE current_database=currentDatabase() AND query NOT LIKE '%system.processes%' FORMAT Vertical" break fi diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py new file mode 100644 index 00000000000..a9976a7beca --- /dev/null +++ b/tests/sqllogic/connection.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import datetime +import logging +import pyodbc +import sqlite3 +import traceback +import enum +import random +import string +from contextlib import contextmanager + +from exceptions import ProgramError + + +logger = logging.getLogger("connection") +logger.setLevel(logging.DEBUG) + + +class OdbcConnectingArgs: + def __init__(self, **kwargs): + self._kwargs = kwargs + + def __str__(self): + conn_str = ";".join( + ["{}={}".format(x, y) for x, y in self._kwargs.items() if y] + ) + return conn_str + + def update_database(self, database): + self._kwargs["Database"] = database + + @staticmethod + def create_from_kw( + dsn="", server="localhost", user="default", database="default", **kwargs + ): + conn_args = { + "DSN": dsn, + "Server": server, + "User": user, + "Database": database, + } + conn_args.update(kwargs) + return OdbcConnectingArgs(**conn_args) + + @staticmethod + def create_from_connection_string(conn_str): + args = OdbcConnectingArgs() + for kv in conn_str.split(";"): + if kv: + k, v = kv.split("=", 1) + args._kwargs[k] = v + return args + + +def _random_str(length=8): + alphabet = string.ascii_lowercase + string.digits + return "".join(random.SystemRandom().choice(alphabet) for _ in range(length)) + + +def default_clickhouse_odbc_conn_str(): + return str( + OdbcConnectingArgs.create_from_kw( + dsn="ClickHouse DSN (ANSI)", + ) + ) + + +class Engines(enum.Enum): + SQLITE = enum.auto() + ODBC = enum.auto() + + @staticmethod + def list(): + return list(map(lambda c: c.name.lower(), Engines)) + + +class KnownDBMS(str, enum.Enum): + sqlite = "sqlite" + clickhouse = "ClickHouse" + + +class ConnectionWrap(object): + def __init__(self, connection=None, factory=None, factory_kwargs=None): + self._factory = factory + self._factory_kwargs = factory_kwargs + self._connection = connection + + self.DBMS_NAME = None + self.DATABASE_NAME = None + self.USER_NAME = None + + @staticmethod + def create(connection): + return ConnectionWrap(connection=connection) + + @staticmethod + def create_form_factory(factory, factory_kwargs): + return ConnectionWrap( + factory=factory, factory_kwargs=factory_kwargs + ).reconnect() + + def can_reconnect(self): + return self._factory is not None + + def reconnect(self): + if self._connection is not None: + self._connection.close() + self._connection = self._factory(self._factory_kwargs) + return self + + def assert_can_reconnect(self): + assert self.can_reconnect(), f"no reconnect for: {self.DBMS_NAME}" + + def __getattr__(self, item): + return getattr(self._connection, item) + + def __enter__(self): + return self + + def drop_all_tables(self): + if self.DBMS_NAME == KnownDBMS.clickhouse.value: + list_query = ( + f"SELECT name FROM system.tables WHERE database='{self.DATABASE_NAME}'" + ) + elif self.DBMS_NAME == KnownDBMS.sqlite.value: + list_query = f"SELECT name FROM sqlite_master WHERE type='table'" + else: + logger.warning( + "unable to drop all tables for unknown database: %s", self.DBMS_NAME + ) + return + + list_result = execute_request(list_query, self) + logger.info("tables will be dropped: %s", list_result.get_result()) + for table_name in list_result.get_result(): + table_name = table_name[0] + execute_request(f"DROP TABLE {table_name}", self).assert_no_exception() + logger.debug("success drop table: %s", table_name) + + def _use_database(self, database="default"): + if self.DBMS_NAME == KnownDBMS.clickhouse.value: + logger.info("use test database: %s", database) + self._factory_kwargs.update_database(database) + self.reconnect() + self.DATABASE_NAME = database + + def use_random_database(self): + if self.DBMS_NAME == KnownDBMS.clickhouse.value: + database = f"test_{_random_str()}" + execute_request(f"CREATE DATABASE {database}", self).assert_no_exception() + self._use_database(database) + logger.info( + "currentDatabase : %s", + execute_request(f"SELECT currentDatabase()", self).get_result(), + ) + + @contextmanager + def with_one_test_scope(self): + try: + yield self + finally: + self.drop_all_tables() + + @contextmanager + def with_test_database_scope(self): + self.use_random_database() + try: + yield self + finally: + self._use_database() + + def __exit__(self, *args): + if hasattr(self._connection, "close"): + return self._connection.close() + + +def setup_connection(engine, conn_str=None, make_debug_request=True): + connection = None + + if isinstance(engine, str): + engine = Engines[engine.upper()] + + if engine == Engines.ODBC: + if conn_str is None: + raise ProgramError("conn_str has to be set up for ODBC connection") + + logger.debug("Drivers: %s", pyodbc.drivers()) + logger.debug("DataSources: %s", pyodbc.dataSources()) + logger.debug("Connection string: %s", conn_str) + + conn_args = OdbcConnectingArgs.create_from_connection_string(conn_str) + + connection = ConnectionWrap.create_form_factory( + factory=lambda args: pyodbc.connect(str(args)), + factory_kwargs=conn_args, + ) + connection.add_output_converter(pyodbc.SQL_UNKNOWN_TYPE, lambda x: None) + + connection.DBMS_NAME = connection.getinfo(pyodbc.SQL_DBMS_NAME) + connection.DATABASE_NAME = connection.getinfo(pyodbc.SQL_DATABASE_NAME) + connection.USER_NAME = connection.getinfo(pyodbc.SQL_USER_NAME) + + elif engine == Engines.SQLITE: + conn_str = conn_str if conn_str is not None else ":memory:" + connection = ConnectionWrap.create(sqlite3.connect(conn_str)) + + connection.DBMS_NAME = "sqlite" + connection.DATABASE_NAME = "main" + connection.USER_NAME = "default" + + logger.info( + "Connection info: DBMS name %s, database %s, user %s", + connection.DBMS_NAME, + connection.DATABASE_NAME, + connection.USER_NAME, + ) + + if make_debug_request: + request = "SELECT 1" + logger.debug("Make debug request to the connection: %s", request) + result = execute_request(request, connection) + logger.debug("Debug request returned: %s", result.get_result()) + + logger.debug("Connection is ok") + return connection + + +class ExecResult: + def __init__(self): + self._exception = None + self._result = None + self._description = None + + def as_exception(self, exc): + self._exception = exc + return self + + def get_result(self): + self.assert_no_exception() + return self._result + + def get_description(self): + self.assert_no_exception() + return self._description + + def as_ok(self, rows=None, description=None): + if rows is None: + self._result = True + return self + self._result = rows + self._description = description + return self + + def get_exception(self): + return self._exception + + def has_exception(self): + return self._exception is not None + + def assert_no_exception(self): + if self.has_exception(): + raise ProgramError( + f"request doesn't have a result set, it has the exception", + parent=self._exception, + ) + + +def execute_request(request, connection): + cursor = connection.cursor() + try: + cursor.execute(request) + if cursor.description: + logging.debug("request has a description %s", cursor.description) + rows = cursor.fetchall() + connection.commit() + return ExecResult().as_ok(rows=rows, description=cursor.description) + else: + logging.debug("request doesn't have a description") + connection.commit() + return ExecResult().as_ok() + except (pyodbc.Error, sqlite3.DatabaseError) as err: + return ExecResult().as_exception(err) + finally: + cursor.close() diff --git a/tests/sqllogic/exceptions.py b/tests/sqllogic/exceptions.py new file mode 100644 index 00000000000..30c8983d80f --- /dev/null +++ b/tests/sqllogic/exceptions.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from enum import Enum + + +class Error(Exception): + def __init__( + self, + message, + file=None, + name=None, + pos=None, + request=None, + details=None, + *args, + **kwargs, + ): + super().__init__(message, *args, **kwargs) + self._file = file + self._name = name + self._pos = pos + self._request = request + self._details = details + + @property + def test_file(self): + return self._file + + @property + def test_name(self): + return self._name + + @property + def test_pos(self): + return self._pos + + @property + def request(self): + return self._request + + @property + def message(self): + return super().__str__() + + @property + def reason(self): + return ", ".join( + ( + str(x) + for x in [ + super().__str__(), + "details: {}".format(self._details) if self._details else "", + ] + if x + ) + ) + + def set_details(self, file=None, name=None, pos=None, request=None, details=None): + if file is not None: + self._file = file + if name is not None: + self._name = name + if pos is not None: + self._pos = pos + if pos is not None: + self._request = request + if request is not None: + self._request = request + if details is not None: + self._details = details + + def _at_file_and_pos(self): + if self._file is not None and self._pos is not None: + return f"at: [{self._file}:{self._pos}]" + if self._name is not None and self._pos is not None: + return f"at: [{self._name}:{self._pos}]" + return None + + +class ErrorWithParent(Error): + def __init__(self, message, parent=None, *args, **kwargs): + super().__init__(message, *args, **kwargs) + self._parent = parent + + def get_parent(self): + return self._parent + + @property + def reason(self): + return ", ".join( + ( + str(x) + for x in [ + super().reason, + "exception: {}".format(str(self._parent)) if self._parent else "", + ] + if x + ) + ) + + +class ProgramError(ErrorWithParent): + def __str__(self): + return self.reason + + +class DataResultDiffer(Error): + pass + + +class SchemeResultDiffer(Error): + pass + + +class StatementExecutionError(ErrorWithParent): + pass + + +class QueryExecutionError(ErrorWithParent): + pass + + +class StatementSuccess(Error): + def __init__(self, *args, **kwargs): + message = kwargs["success"] if "message" in kwargs else "success" + super().__init__(message, *args, **kwargs) + + +class QuerySuccess(Error): + def __init__(self, *args, **kwargs): + message = kwargs["success"] if "message" in kwargs else "success" + super().__init__(message, *args, **kwargs) diff --git a/tests/sqllogic/runner.py b/tests/sqllogic/runner.py new file mode 100755 index 00000000000..1cf4c19c649 --- /dev/null +++ b/tests/sqllogic/runner.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import enum +import os +import logging +import csv +import json +import multiprocessing +from functools import reduce +from deepdiff import DeepDiff + +from connection import setup_connection, Engines, default_clickhouse_odbc_conn_str +from test_runner import TestRunner, Status, RequestType + + +LEVEL_NAMES = [x.lower() for x in logging._nameToLevel.keys() if x != logging.NOTSET] + + +def setup_logger(args): + logging.getLogger().setLevel(logging.NOTSET) + formatter = logging.Formatter( + fmt="%(asctime)s %(levelname)s %(name)s %(filename)s %(funcName)s:%(lineno)d - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + + if args.log_file: + file_handler = logging.FileHandler(args.log_file) + file_handler.setLevel(args.log_level.upper()) + file_handler.setFormatter(formatter) + logging.getLogger().addHandler(file_handler) + else: + stream_handler = logging.StreamHandler() + stream_handler.setLevel(logging.INFO) + stream_handler.setFormatter(formatter) + logging.getLogger().addHandler(stream_handler) + + +def __write_check_status(status_row, out_dir): + if len(status_row) > 140: + status_row = status_row[0:135] + "..." + check_status_path = os.path.join(out_dir, "check_status.tsv") + with open(check_status_path, "a") as stream: + writer = csv.writer(stream, delimiter="\t", lineterminator="\n") + writer.writerow(status_row) + + +class TestNameGranularity(str, enum.Enum): + file = enum.auto() + request = enum.auto() + + +def __write_test_result( + reports, + out_dir, + mode_name, + granularity=TestNameGranularity.request, + only_errors=None, +): + all_stages = reports.keys() + test_results_path = os.path.join(out_dir, "test_results.tsv") + with open(test_results_path, "a") as stream: + writer = csv.writer(stream, delimiter="\t", lineterminator="\n") + for stage in all_stages: + report = reports[stage] + for test_report in report.tests.values(): + test_name_prefix = ( + f"sqllogic::{mode_name}::{stage}::{test_report.test_name}" + ) + + for request_status in test_report.requests.values(): + if request_status.status == Status.error or not only_errors: + test_name = test_name_prefix + if granularity == TestNameGranularity.request: + test_name += f"::{request_status.position}" + + test_status = "success" + if request_status.status == Status.error: + test_status = "FAIL" + + log_row = ( + f"position: {request_status.position}" + f", type: {request_status.request_type.name.lower()}" + f", request: '{request_status.request}'" + ) + if request_status.status == Status.error: + log_row += f", reason: '{request_status.reason}'" + + writer.writerow( + [ + test_name, + test_status, + 0, + log_row, + ] + ) + + +def statements_report(reports, out_dir, mode_name): + __write_test_result( + reports, + out_dir, + mode_name, + granularity=TestNameGranularity.file, + only_errors=True, + ) + + failed_stages = [] + for stage, report in reports.items(): + if report.stats.total.fail > 0: + failed_stages.append(stage) + + if len(failed_stages) == 0: + status_row = [ + "success", + f"All tests from {mode_name} are successful", + ] + __write_check_status(status_row, out_dir) + return + + stage = max(failed_stages, key=lambda x: reports[x].stats.total.fail) + stats = reports[stage].stats + status_row = [ + "error", + f"{stats.total.fail}/{stats.total.all} tests failed at {mode_name}::{stage}", + ] + __write_check_status(status_row, out_dir) + + +def _child_process(setup_kwargs, runner_kwargs, input_dir, output_dir, test): + with setup_connection(**setup_kwargs) as connection: + with connection.with_test_database_scope(): + runner = TestRunner(connection, **runner_kwargs) + runner.run_all_tests_from_file(test, input_dir) + runner.write_results_to_dir(output_dir) + return runner.report + + +def run_all_tests_in_parallel(setup_kwargs, runner_kwargs, input_dir, output_dir): + process_count = max(1, os.cpu_count() - 2) + with multiprocessing.Pool(process_count) as pool: + async_results = [ + pool.apply_async( + _child_process, + args=( + setup_kwargs, + runner_kwargs, + input_dir, + output_dir, + test, + ), + ) + for test in TestRunner.list_tests(input_dir) + ] + reports = [ar.get() for ar in async_results] + + report = reduce(lambda x, y: x.combine_with(y), reports) + report.write_report(output_dir) + return report + + +def as_kwargs(**kwargs): + return kwargs + + +def mode_check_statements(parser): + parser.add_argument("--input-dir", metavar="DIR", required=True) + parser.add_argument("--out-dir", metavar="DIR", required=True) + + def calle(args): + input_dir = os.path.realpath(args.input_dir) + out_dir = os.path.realpath(args.out_dir) + + if not os.path.exists(input_dir): + raise FileNotFoundError( + input_dir, f"check statements: no such file or directory {input_dir}" + ) + + if not os.path.isdir(input_dir): + raise NotADirectoryError( + input_dir, f"check statements:: not a dir {input_dir}" + ) + + reports = dict() + + out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") + + complete_sqlite_dir = os.path.join(out_stages_dir, "complete-sqlite") + os.makedirs(complete_sqlite_dir, exist_ok=True) + + reports["complete-sqlite"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.SQLITE, + ), + runner_kwargs=as_kwargs( + verify_mode=False, + skip_request_types=[RequestType.query], + stop_at_statement_error=True, + ), + input_dir=input_dir, + output_dir=complete_sqlite_dir, + ) + + verify_clickhouse_dir = os.path.join(out_stages_dir, "verify-clickhouse") + os.makedirs(verify_clickhouse_dir, exist_ok=True) + + reports["verify-clickhouse"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.ODBC, + conn_str=default_clickhouse_odbc_conn_str(), + ), + runner_kwargs=as_kwargs( + verify_mode=True, + skip_request_types=[RequestType.query], + stop_at_statement_error=True, + ), + input_dir=complete_sqlite_dir, + output_dir=verify_clickhouse_dir, + ) + + statements_report(reports, out_dir, args.mode) + + parser.set_defaults(func=calle) + + +def make_actual_report(reports): + return {stage: report.get_map() for stage, report in reports.items()} + + +def write_actual_report(actial, out_dir): + with open(os.path.join(out_dir, "actual_report.json"), "w") as f: + f.write(json.dumps(actial)) + + +def read_canonic_report(input_dir): + file = os.path.join(input_dir, "canonic_report.json") + if not os.path.exists(file): + return {} + + with open(os.path.join(input_dir, "canonic_report.json"), "r") as f: + data = f.read() + return json.loads(data) + + +def write_canonic_report(canonic, out_dir): + with open(os.path.join(out_dir, "canonic_report.json"), "w") as f: + f.write(json.dumps(canonic)) + + +def self_test_report(reports, input_dir, out_dir, mode_name): + actual = make_actual_report(reports) + write_actual_report(actual, out_dir) + + canonic = read_canonic_report(input_dir) + write_canonic_report(canonic, out_dir) + + status_row = [ + "success", + f"All statements from {mode_name} are successful", + ] + + failed_stages = {} + + for stage, actual_report in actual.items(): + actual_stats = actual_report["stats"] + + if stage not in canonic: + failed_stages[stage] = actual_stats.items() + continue + + canonic_report = canonic[stage] + canonic_stats = canonic_report["stats"] + + logging.debug("stage: %s, canonic: %s", stage, canonic_stats) + logging.debug("stage: %s, actual: %s", stage, actual_stats) + + diff = DeepDiff(actual_stats, canonic_stats) + if len(diff): + failed_stages[stage] = diff + logging.error("diff: %s", diff) + else: + logging.debug("diff: %s", diff) + + all_stages = actual.keys() + if len(failed_stages) > 0: + description = f"Failed {len(failed_stages)}/{len(all_stages)} from {mode_name}, stages: {','.join(failed_stages)}" + status_row = ["error", description] + + __write_check_status(status_row, out_dir) + + +def mode_self_test(parser): + parser.add_argument("--self-test-dir", metavar="DIR", required=True) + parser.add_argument("--out-dir", metavar="DIR", required=True) + + def calle(args): + self_test_dir = os.path.realpath(args.self_test_dir) + if not os.path.exists(self_test_dir): + raise FileNotFoundError( + self_test_dir, f"self test: no such file or directory {self_test_dir}" + ) + if not os.path.isdir(self_test_dir): + raise NotADirectoryError( + self_test_dir, f"self test: not a dir {self_test_dir}" + ) + logging.debug("self test dir is: %s", self_test_dir) + + out_dir = os.path.realpath(args.out_dir) + if not os.path.exists(out_dir): + raise FileNotFoundError(out_dir, f"self test: dir not found {out_dir}") + if not os.path.isdir(out_dir): + raise NotADirectoryError(out_dir, f"self test: not a dir {out_dir}") + + reports = dict() + + out_stages_dir = os.path.join(out_dir, f"{args.mode}-stages") + + out_dir_sqlite_complete = os.path.join(out_stages_dir, "sqlite-complete") + os.makedirs(out_dir_sqlite_complete, exist_ok=True) + with setup_connection(Engines.SQLITE) as sqlite: + runner = TestRunner(sqlite) + runner.run_all_tests_from_dir(self_test_dir) + runner.write_results_to_dir(out_dir_sqlite_complete) + runner.write_report(out_dir_sqlite_complete) + reports["sqlite-complete"] = runner.report + + out_dir_sqlite_vs_sqlite = os.path.join(out_stages_dir, "sqlite-vs-sqlite") + os.makedirs(out_dir_sqlite_vs_sqlite, exist_ok=True) + with setup_connection(Engines.SQLITE) as sqlite: + runner = TestRunner(sqlite) + runner.with_verify_mode() + runner.run_all_tests_from_dir(out_dir_sqlite_complete) + runner.write_results_to_dir(out_dir_sqlite_vs_sqlite) + runner.write_report(out_dir_sqlite_vs_sqlite) + reports["sqlite-vs-sqlite"] = runner.report + + out_dir_clickhouse_complete = os.path.join( + out_stages_dir, "clickhouse-complete" + ) + os.makedirs(out_dir_clickhouse_complete, exist_ok=True) + with setup_connection( + Engines.ODBC, default_clickhouse_odbc_conn_str() + ) as clickhouse: + runner = TestRunner(clickhouse) + runner.run_all_tests_from_dir(self_test_dir) + runner.write_results_to_dir(out_dir_clickhouse_complete) + runner.write_report(out_dir_clickhouse_complete) + reports["clickhouse-complete"] = runner.report + + out_dir_clickhouse_vs_clickhouse = os.path.join( + out_stages_dir, "clickhouse-vs-clickhouse" + ) + os.makedirs(out_dir_clickhouse_vs_clickhouse, exist_ok=True) + with setup_connection( + Engines.ODBC, default_clickhouse_odbc_conn_str() + ) as clickhouse: + runner = TestRunner(clickhouse) + runner.with_verify_mode() + runner.run_all_tests_from_dir(out_dir_clickhouse_complete) + runner.write_results_to_dir(out_dir_clickhouse_vs_clickhouse) + runner.write_report(os.path.join(out_dir_clickhouse_vs_clickhouse)) + reports["clickhouse-vs-clickhouse"] = runner.report + + out_dir_sqlite_vs_clickhouse = os.path.join( + out_stages_dir, "sqlite-vs-clickhouse" + ) + os.makedirs(out_dir_sqlite_vs_clickhouse, exist_ok=True) + + reports["sqlite-vs-clickhouse"] = run_all_tests_in_parallel( + setup_kwargs=as_kwargs( + engine=Engines.ODBC, + conn_str=default_clickhouse_odbc_conn_str(), + ), + runner_kwargs=as_kwargs( + verify_mode=True, + ), + input_dir=out_dir_sqlite_complete, + output_dir=out_dir_sqlite_vs_clickhouse, + ) + + self_test_report(reports, self_test_dir, out_dir, args.mode) + + parser.set_defaults(func=calle) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="This script runs sqllogic tests over database." + ) + + parser.add_argument("--log-file", help="write logs to the file", metavar="FILE") + parser.add_argument( + "--log-level", + help="define the log level for log file", + metavar="level", + choices=LEVEL_NAMES, + default="debug", + ) + + subparsers = parser.add_subparsers(dest="mode") + mode_check_statements( + subparsers.add_parser( + "statements-test", + help="Run all test. Check that all statements are passed", + ) + ) + mode_self_test( + subparsers.add_parser( + "self-test", + help="Run all test. Check that all statements are passed", + ) + ) + args = parser.parse_args() + if args.mode is None: + parser.print_help() + return args + + +def main(): + args = parse_args() + setup_logger(args) + if args.mode is not None: + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/sqllogic/self-test/canonic_report.json b/tests/sqllogic/self-test/canonic_report.json new file mode 100644 index 00000000000..0cd1aa4b43b --- /dev/null +++ b/tests/sqllogic/self-test/canonic_report.json @@ -0,0 +1 @@ +{"sqlite-complete": {"dbms_name": "sqlite", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 14, "fail": 4}, "total": {"success": 18, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 14, "fail": 4}, "total": {"success": 18, "fail": 4}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: no such column: c"}, "38": {"status": "success", "position": 38, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "126": {"status": "success", "position": 126, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}}}}}, "sqlite-vs-sqlite": {"dbms_name": "sqlite", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 3}, "total": {"success": 19, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-sqlite", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 3}, "total": {"success": 19, "fail": 3}}, "requests": {"5": {"status": "success", "position": 5, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER)", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "42": {"status": "success", "position": 42, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: no such column: c"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "135": {"status": "success", "position": 135, "request_type": "query", "request": "WITH RECURSIVE cnt(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM cnt LIMIT 20 ) SELECT x FROM cnt;", "reason": "success"}}}}}, "clickhouse-complete": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 19, "fail": 4}}, "input_dir": "/clickhouse-tests/sqllogic/self-test", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "tests": {"test.test": {"test_name": "test.test", "test_file": "/clickhouse-tests/sqllogic/self-test/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 15, "fail": 4}, "total": {"success": 19, "fail": 4}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "25": {"status": "error", "position": 25, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "query execution failed with an exception, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "32": {"status": "success", "position": 32, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "44": {"status": "error", "position": 44, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "49": {"status": "success", "position": 49, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "55": {"status": "success", "position": 55, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "64": {"status": "success", "position": 64, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "70": {"status": "success", "position": 70, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "76": {"status": "success", "position": 76, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "82": {"status": "error", "position": 82, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "88": {"status": "error", "position": 88, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "94": {"status": "success", "position": 94, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "99": {"status": "success", "position": 99, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "105": {"status": "success", "position": 105, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "110": {"status": "success", "position": 110, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "116": {"status": "success", "position": 116, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "121": {"status": "success", "position": 121, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "139": {"status": "success", "position": 139, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}, "clickhouse-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 20, "fail": 3}}, "input_dir": "/test_output/self-test/self-test-stages/clickhouse-complete", "output_dir": "/test_output/self-test/self-test-stages/clickhouse-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/clickhouse-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 16, "fail": 3}, "total": {"success": 20, "fail": 3}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "success", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "success"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "success", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "success"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "success", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "success"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}, "sqlite-vs-clickhouse": {"dbms_name": "ClickHouse", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 17, "fail": 6}}, "input_dir": "/test_output/self-test/self-test-stages/sqlite-complete", "output_dir": "/test_output/self-test/self-test-stages/sqlite-vs-clickhouse", "tests": {"test.test": {"test_name": "test.test", "test_file": "/test_output/self-test/self-test-stages/sqlite-complete/test.test", "stats": {"statements": {"success": 4, "fail": 0}, "queries": {"success": 13, "fail": 6}, "total": {"success": 17, "fail": 6}}, "requests": {"1": {"status": "success", "position": 1, "request_type": "statement", "request": "CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple()", "reason": "success"}, "9": {"status": "success", "position": 9, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(1,2)", "reason": "success"}, "12": {"status": "success", "position": 12, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(3,4)", "reason": "success"}, "15": {"status": "success", "position": 15, "request_type": "statement", "request": "INSERT INTO t1(a,b) VALUES(5,6)", "reason": "success"}, "18": {"status": "success", "position": 18, "request_type": "query", "request": "SELECT a, b FROM t1 ORDER BY 2,1", "reason": "success"}, "28": {"status": "error", "position": 28, "request_type": "query", "request": "SELECT a, c FROM t1 ORDER BY 2,1", "reason": "canonic and actual results have different exceptions, details: canonic: query execution failed with an exception, original is: no such column: c, actual: query execution failed with an exception, original is: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1 ORDER BY c ASC, a ASC', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "36": {"status": "success", "position": 36, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "success"}, "48": {"status": "error", "position": 48, "request_type": "query", "request": "SELECT a, c FROM t1", "reason": "query is expected to fail with different error, details: expected error: expect to fail in a different way, exception: ('HY000', \"[HY000] HTTP status code: 404\nReceived error:\nCode: 47. DB::Exception: Missing columns: 'c' while processing query: 'SELECT a, c FROM t1', required columns: 'a' 'c', maybe you meant: 'a'. (UNKNOWN_IDENTIFIER) (version 23.3.1.1654 (official build))\n\n (1) (SQLExecDirectW)\")"}, "54": {"status": "success", "position": 54, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "60": {"status": "success", "position": 60, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "66": {"status": "success", "position": 66, "request_type": "query", "request": "SELECT ''", "reason": "success"}, "72": {"status": "success", "position": 72, "request_type": "query", "request": "SELECT -1.0", "reason": "success"}, "78": {"status": "success", "position": 78, "request_type": "query", "request": "SELECT -1", "reason": "success"}, "84": {"status": "success", "position": 84, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "success"}, "90": {"status": "error", "position": 90, "request_type": "query", "request": "SELECT 1.0, 1", "reason": "canonic and actual columns count differ, details: expected columns 1, actual columns 2"}, "96": {"status": "error", "position": 96, "request_type": "query", "request": "SELECT 1.0", "reason": "canonic and actual columns count differ, details: expected columns 2, actual columns 1"}, "102": {"status": "success", "position": 102, "request_type": "query", "request": "select a, b from t1 where a = b", "reason": "success"}, "107": {"status": "success", "position": 107, "request_type": "query", "request": "SELECT 1.0013", "reason": "success"}, "113": {"status": "error", "position": 113, "request_type": "query", "request": "SELECT NULL as a", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "118": {"status": "success", "position": 118, "request_type": "query", "request": "SELECT CAST(NULL AS Nullable(INTEGER))", "reason": "success"}, "124": {"status": "error", "position": 124, "request_type": "query", "request": "SELECT NULL", "reason": "actual result has exception and canonic result doesn't, details: actual: query execution failed with an exception, original is: ('ODBC SQL type 0 is not yet supported. column-index=0 type=0', 'HY106')"}, "129": {"status": "success", "position": 129, "request_type": "query", "request": "SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "reason": "success"}, "148": {"status": "success", "position": 148, "request_type": "query", "request": "SELECT number+1 from system.numbers LIMIT 20", "reason": "success"}}}}}} diff --git a/tests/sqllogic/self-test/test.test b/tests/sqllogic/self-test/test.test new file mode 100644 index 00000000000..85b27ed7d60 --- /dev/null +++ b/tests/sqllogic/self-test/test.test @@ -0,0 +1,145 @@ +onlyif ClickHouse +statement ok +CREATE TABLE t1(a INTEGER, b INTEGER) ENGINE = MergeTree() PRIMARY KEY tuple() + +skipif ClickHouse +statement ok +CREATE TABLE t1(a INTEGER, b INTEGER) + +statement ok +INSERT INTO t1(a,b) VALUES(1,2) + +statement ok +INSERT INTO t1(a,b) VALUES(3,4) + +statement ok +INSERT INTO t1(a,b) VALUES(5,6) + +# just ok request +query II nosort +SELECT a, b + FROM t1 + ORDER BY 2,1 +---- + +# will fail and write exception as a result +query II nosort +SELECT a, c + FROM t1 + ORDER BY 2,1 +---- + +# expect to fail +onlyif ClickHouse +query error UNKNOWN_IDENTIFIER +SELECT a, c FROM t1 +---- + +# expect to fail +onlyif sqlite +query error No such column +SELECT a, c FROM t1 +---- + +# expect to fail in a different way +query error expect to fail in a different way +SELECT a, c FROM t1 +---- + +# print empty as (empty) +query T nosort +SELECT '' +---- +(empty) + +# without result set +query T nosort +SELECT '' +---- + +# without result and saparator +query T nosort +SELECT '' + +# just ok with REAL +query R nosort +SELECT -1.0 +---- +-1.000 + +# just ok with signed +query I nosort +SELECT -1 +---- +-1 + +# just ok +query RI nosort +SELECT 1.0, 1 +---- +1.000 1 + +# mess with columns count +query R nosort +SELECT 1.0, 1 +---- +1.000 1 + +# mess with columns count +query RT nosort +SELECT 1.0 +---- +1.000 + +# empty result set +query II nosort +select a, b from t1 where a = b +---- + +# precise is 3 digits +query R nosort +SELECT 1.0013 +---- +1.001 + +query T nosort +SELECT NULL as a +---- +NULL + +onlyif ClickHouse +query I nosort +SELECT CAST(NULL AS Nullable(INTEGER)) +---- +NULL + +query T nosort +SELECT NULL +---- +NULL NULL + +# thish check how result hashing works +query IIIIIIIIIIIIIII nosort +SELECT 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +---- + +skipif ClickHouse +query I nosort +WITH RECURSIVE + cnt(x) AS ( + SELECT 1 + UNION ALL + SELECT x+1 FROM cnt + LIMIT 20 + ) +SELECT x FROM cnt; +---- +20 values hashing to 52c46dff81346ead02fcf6245c762b1a + +onlyif ClickHouse +query I nosort +SELECT number+1 from system.numbers LIMIT 20 +---- +20 values hashing to 52c46dff81346ead02fcf6245c762b1a + + diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py new file mode 100755 index 00000000000..42adb83809f --- /dev/null +++ b/tests/sqllogic/test_parser.py @@ -0,0 +1,609 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import os + +from itertools import chain +from enum import Enum +from hashlib import md5 +from functools import reduce + +from exceptions import Error, ProgramError, ErrorWithParent, DataResultDiffer + + +logger = logging.getLogger("parser") +logger.setLevel(logging.DEBUG) + +CONDITION_SKIP = "skipif" +CONDITION_ONLY = "onlyif" + + +# TODO replace assertions with raise exception +class TestFileFormatException(Error): + pass + + +class FileAndPos: + def __init__(self, file=None, pos=None): + self.file = file + self.pos = pos + + def __str__(self): + return f"{self.file}:{self.pos}" + + +def check_conditions(conditions, dbms_name): + rules = {} + for rec in conditions: + key, val = rec + if key not in conditions: + rules[key] = [] + rules[key].append(val) + if CONDITION_SKIP in rules: + if dbms_name in rules[CONDITION_SKIP]: + return False + if CONDITION_ONLY in rules: + if dbms_name not in rules[CONDITION_ONLY]: + return False + return True + + +class BlockType(Enum): + comments = 1 + control = 2 + statement = 3 + query = 4 + + +COMMENT_TOKENS = ["#"] +RESULT_SEPARATION_LINE = "----" +CONTROL_TOKENS = ["halt", "hash-threshold"] + +CONDITIONS_TOKENS = [CONDITION_SKIP, CONDITION_ONLY] +STATEMENT_TOKEN = "statement" +QUERY_TOKEN = "query" + + +ACCEPTABLE_TYPES = {type(""): "T", type(1): "I", type(0.001): "R"} + + +def _is_comment_line(tokens): + return tokens and tokens[0][0] in COMMENT_TOKENS + + +def _is_separation_line(tokens): + return tokens and tokens[0] == RESULT_SEPARATION_LINE + + +def _is_control_line(tokens): + return tokens and tokens[0] in CONTROL_TOKENS + + +def _is_conditional_line(tokens): + return tokens and tokens[0] in CONDITIONS_TOKENS + + +def _is_statement_line(tokens): + return tokens and tokens[0] == STATEMENT_TOKEN + + +def _is_query_line(tokens): + return tokens and tokens[0] == QUERY_TOKEN + + +class FileBlockBase: + def __init__(self, parser, start, end): + self._parser = parser + self._start = start + self._end = end + + def get_block_type(self): + pass + + def get_pos(self): + return self._start + 1 + + @staticmethod + def __parse_request(test_file, start, end): + request_end = start + while request_end < end: + tokens = test_file.get_tokens(request_end) + if not tokens or _is_separation_line(tokens): + break + request_end += 1 + request = test_file.get_tokens_from_lines(start, request_end) + logger.debug("slice request %s:%s end %s", start, request_end, end) + return " ".join(request), request_end + + @staticmethod + def __parse_result(test_file, start, end): + result_end = start + while result_end < end: + tokens = test_file.get_tokens(result_end) + if not tokens: + break + result_end += 1 + logger.debug("slice result %s:%s end %s", start, result_end, end) + result = test_file.get_tokens(start, result_end) + return result, result_end + + @staticmethod + def parse_block(parser, start, end): + file_pos = FileAndPos(parser.get_test_name(), start + 1) + logger.debug("%s start %s end %s", file_pos, start, end) + + block_type = BlockType.comments + conditions = [] + controls = [] + statement = None + query = None + request = [] + result_line = None + result = [] + + line = start + while line < end: + tokens = parser.get_tokens(line) + + if _is_comment_line(tokens): + pass + elif _is_conditional_line(tokens): + conditions.append(parser.get_tokens(line)) + + elif _is_control_line(tokens): + assert block_type in (BlockType.comments, BlockType.control) + block_type = BlockType.control + controls.append(parser.get_tokens(line)) + + elif _is_statement_line(tokens): + assert block_type in (BlockType.comments,) + block_type = BlockType.statement + statement = parser.get_tokens(line) + request, last_line = FileBlockBase.__parse_request( + parser, line + 1, end + ) + assert last_line == end + line = last_line + + elif _is_query_line(tokens): + assert block_type in (BlockType.comments,) + block_type = BlockType.query + query = parser.get_tokens(line) + request, last_line = FileBlockBase.__parse_request( + parser, line + 1, end + ) + result_line = last_line + line = last_line + if line == end: + break + tokens = parser.get_tokens(line) + assert _is_separation_line(tokens), f"last_line {last_line}, end {end}" + result, last_line = FileBlockBase.__parse_result(parser, line + 1, end) + assert last_line == end + line = last_line + line += 1 + + if block_type == BlockType.comments: + return FileBlockComments(parser, start, end) + + if block_type == BlockType.control: + return FileBlockControl(parser, start, end, conditions, controls) + + if block_type == BlockType.statement: + return FileBlockStatement( + parser, start, end, conditions, statement, request + ) + + if block_type == BlockType.query: + block = FileBlockQuery( + parser, start, end, conditions, query, request, result_line + ) + block.with_result(result) + return block + + def dump_to(self, output): + if output is None: + return + for line in range(self._start, self._end): + output.write(self._parser.get_line(line)) + output.write("\n") + + +class FileBlockComments(FileBlockBase): + def __init__(self, parser, start, end): + super().__init__(parser, start, end) + + def get_block_type(self): + return BlockType.comments + + +class FileBlockControl(FileBlockBase): + def __init__(self, parser, start, end, conditions, control): + super().__init__(parser, start, end) + self.conditions = conditions + self.control = control + + def get_block_type(self): + return BlockType.control + + def get_conditions(self): + return self.conditions + + +class FileBlockStatement(FileBlockBase): + def __init__(self, parser, start, end, conditions, statement, request): + super().__init__(parser, start, end) + self.conditions = conditions + self.statement = statement + self.request = request + + def get_block_type(self): + return BlockType.statement + + def get_request(self): + return self.request + + def get_conditions(self): + return self.conditions + + def get_statement(self): + return self.statement + + def expected_error(self): + return self.statement[1] == "error" + + +class FileBlockQuery(FileBlockBase): + def __init__(self, parser, start, end, conditions, query, request, result_line): + super().__init__(parser, start, end) + self.conditions = conditions + self.query = query + self.request = request + self.result = None + self.result_line = result_line + + def get_block_type(self): + return BlockType.query + + def get_request(self): + return self.request + + def get_conditions(self): + return self.conditions + + def get_query(self): + return self.query + + def expected_error(self): + return " ".join(self.query[2:]).lower() if self.query[1] == "error" else None + + def get_types(self): + if self.query[1] == "error": + raise TestFileFormatException( + "the query is expected to fail, there are no types" + ) + return self.query[1] + + def get_sort_mode(self): + return self.query[2] + + def get_result(self): + return self.result + + def with_result(self, result): + self.result = result + + def dump_to(self, output): + if output is None: + return + + for line in range(self._start, self.result_line): + output.write(self._parser.get_line(line)) + + if self.result is not None: + logger.debug("dump result %s", self.result) + output.write("----\n") + for row in self.result: + output.write(" ".join(row) + "\n") + + output.write("\n") + + +class TestFileParser: + CONTROL_TOKENS = ["halt", "hash-threshold"] + CONDITIONS_TOKENS = [CONDITION_SKIP, CONDITION_ONLY] + STATEMENT_TOKEN = "statement" + QUERY_TOKEN = "query" + COMMENT_TOKEN = "#" + + DEFAULT_HASH_THRESHOLD = 8 + + def __init__(self, stream, test_name, test_file): + self._stream = stream + self._test_name = test_name + self._test_file = test_file + + self._lines = [] + self._raw_tokens = [] + self._tokens = [] + self._empty_lines = [] + + def get_test_name(self): + return self._test_name + + def get_test_file(self): + if self._test_file is not None: + return self._test_file + return self._test_name + + def get_line(self, line): + return self._lines[line] + + def get_tokens(self, start, end=None): + if end is None: + return self._tokens[start] + else: + return self._tokens[start:end] + + def get_tokens_from_lines(self, start, end): + return list(chain(*self._tokens[start:end])) + + def __load_file(self): + self._lines = self._stream.readlines() + + self._raw_tokens = [line.split() for line in self._lines] + assert len(self._lines) == len(self._raw_tokens) + + self._tokens = [] + for line in self._raw_tokens: + if self.COMMENT_TOKEN in line: + comment_starts_at = line.index(self.COMMENT_TOKEN) + self._tokens.append(line[0:comment_starts_at]) + else: + self._tokens.append(line) + + self._empty_lines = [i for i, x in enumerate(self._raw_tokens) if len(x) == 0] + + logger.debug( + "Test file %s loaded rows %s, empty rows %s", + self.get_test_file(), + len(self._lines), + len(self._empty_lines), + ) + + def __unload_file(self): + self._test_file = None + self._test_name = None + self._stream = None + self._lines = [] + self._raw_tokens = [] + self._tokens = [] + self._empty_lines = [] + + def _iterate_blocks(self): + prev = 0 + for i in self._empty_lines: + if prev != i: + yield FileBlockBase.parse_block(self, prev, i) + prev = i + 1 + + if prev != len(self._lines): + yield FileBlockBase.parse_block(self, prev, len(self._lines)) + + def test_blocks(self): + try: + self.__load_file() + yield from self._iterate_blocks() + finally: + self.__unload_file() + + +class QueryResult: + def __init__( + self, + rows=None, + values_count=None, + data_hash=None, + exception=None, + hash_threshold=0, + ): + self.rows = rows + self.values_count = values_count + self.data_hash = data_hash + self.exception = exception + self.hash_threshold = hash_threshold + self.hash_it() + logger.debug("created QueryResult %s", str(self)) + + def __str__(self): + params = ", ".join( + ( + str(x) + for x in [ + "rows: {}".format(self.rows) if self.rows else "", + "values_count: {}".format(self.values_count) + if self.values_count + else "", + "data_hash: {}".format(self.data_hash) if self.data_hash else "", + "exception: {}".format(self.exception) if self.exception else "", + "hash_threshold: {}".format(self.hash_threshold) + if self.hash_threshold + else "", + ] + if x + ) + ) + return "QueryResult({})".format(params) + + def __iter__(self): + if self.rows is not None: + if self.hash_threshold == 0: + return iter(self.rows) + if self.values_count <= self.hash_threshold: + return iter(self.rows) + if self.data_hash is not None: + return iter( + [["{} values hashing to {}".format(self.values_count, self.data_hash)]] + ) + if self.exception is not None: + return iter([["exception: {}".format(self.exception)]]) + raise ProgramError("Query result is empty", details="{}".format(self.__str__())) + + @staticmethod + def __value_count(rows): + return reduce(lambda a, b: a + len(b), rows, 0) + + @staticmethod + def parse_it(rows, hash_threshold): + logger.debug("parse result len: %s rows: %s", len(rows), rows) + if len(rows) == 1: + logger.debug("one row is %s", rows) + if len(rows[0]) > 0 and rows[0][0] == "exception:": + logging.debug("as exception") + message = " ".join(rows[0][1:]) + return QueryResult(exception=message) + if len(rows[0]) == 5 and " ".join(rows[0][1:4]) == "values hashing to": + logging.debug("as hashed data") + values_count = int(rows[0][0]) + data_hash = rows[0][4] + return QueryResult(data_hash=data_hash, values_count=values_count) + logger.debug("as data") + values_count = QueryResult.__value_count(rows) + return QueryResult( + rows=rows, values_count=values_count, hash_threshold=hash_threshold + ) + + @staticmethod + def __result_as_strings(rows, types): + res = [] + for row in rows: + res_row = [] + for c, t in zip(row, types): + if c is None: + res_row.append("NULL") + continue + + if t == "T": + if c == "": + res_row.append("(empty)") + else: + res_row.append(str(c)) + elif t == "I": + res_row.append(str(int(c))) + elif t == "R": + res_row.append(f"{c:.3f}") + + res.append(res_row) + return res + + @staticmethod + def __sort_result(rows, sort_mode): + if sort_mode == "nosort": + return rows + if sort_mode == "rowsort": + return sorted(rows) + if sort_mode == "valuesort": + values = list(chain(*rows)) + values.sort() + return [values] if values else [] + + @staticmethod + def __calculate_hash(rows): + md5_hash = md5() + for row in rows: + for value in row: + md5_hash.update(value.encode("ascii")) + return str(md5_hash.hexdigest()) + + @staticmethod + def make_it(rows, types, sort_mode, hash_threshold): + values_count = QueryResult.__value_count(rows) + as_string = QueryResult.__result_as_strings(rows, types) + as_sorted = QueryResult.__sort_result(as_string, sort_mode) + return QueryResult( + rows=as_sorted, values_count=values_count, hash_threshold=hash_threshold + ) + + def hash_it(self): + if self.rows is not None and self.data_hash is None: + self.data_hash = QueryResult.__calculate_hash(self.rows) + return self + + @staticmethod + def as_exception(e): + # do not print details to the test file + # but print original exception + if isinstance(e, ErrorWithParent): + message = "{}, original is: {}".format(e, e.get_parent()) + else: + message = "{}".format(e) + + return QueryResult(exception=message) + + @staticmethod + def assert_eq(canonic, actual): + if not isinstance(canonic, QueryResult): + raise ProgramError("NotImplemented") + + if not isinstance(actual, QueryResult): + raise ProgramError("NotImplemented") + + if canonic.exception is not None or actual.exception is not None: + if canonic.exception is not None and actual.exception is not None: + if canonic.exception != actual.exception: + raise DataResultDiffer( + "canonic and actual results have different exceptions", + details=f"canonic: {canonic.exception}, actual: {actual.exception}", + ) + else: + # exceptions are the same + return + elif canonic.exception is not None: + raise DataResultDiffer( + "canonic result has exception and actual result doesn't", + details=f"canonic: {canonic.exception}", + ) + else: + raise DataResultDiffer( + "actual result has exception and canonic result doesn't", + details=f"actual: {actual.exception}", + ) + + canonic.hash_it() + actual.hash_it() + + if canonic.data_hash is not None: + if actual.data_hash is None: + raise ProgramError("actual result has to have hash for data") + if canonic.values_count != actual.values_count: + raise DataResultDiffer( + "canonic and actual results have different value count", + details="canonic values count {}, actual {}".format( + canonic.values_count, actual.values_count + ), + ) + if canonic.data_hash != actual.data_hash: + raise DataResultDiffer( + "canonic and actual results have different hashes" + ) + return + + if canonic.rows is not None and actual.rows is not None: + if canonic.values_count != actual.values_count: + raise DataResultDiffer( + "canonic and actual results have different value count", + details="canonic values count {}, actual {}".format( + canonic.values_count, actual.values_count + ), + ) + if canonic.rows != actual.rows: + raise DataResultDiffer( + "canonic and actual results have different values" + ) + return + + raise ProgramError( + "Unable to compare results", + details="actual {}, canonic {}".format(actual, canonic), + ) diff --git a/tests/sqllogic/test_runner.py b/tests/sqllogic/test_runner.py new file mode 100644 index 00000000000..3df38e7fce5 --- /dev/null +++ b/tests/sqllogic/test_runner.py @@ -0,0 +1,584 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import enum +import logging +import os +import traceback +import io +import json + +import test_parser +from exceptions import ( + Error, + ProgramError, + DataResultDiffer, + StatementExecutionError, + StatementSuccess, + QueryExecutionError, + QuerySuccess, + SchemeResultDiffer, +) +from connection import execute_request + + +logger = logging.getLogger("parser") +logger.setLevel(logging.DEBUG) + + +def _list_files(path): + logger.debug("list files in %s, type %s", path, type(path)) + + if not isinstance(path, str): + raise ProgramError("NotImplemented") + + if os.path.isfile(path): + yield path + else: + with os.scandir(path) as it: + for entry in it: + yield from _list_files(entry.path) + + +def _filter_files(suffix, files): + yield from (path for path in files if path.endswith(suffix)) + + +class RequestType(str, enum.Enum): + statement = enum.auto() + query = enum.auto() + + +class Status(str, enum.Enum): + success = "success" + error = "error" + + +class TestStatus: + def __init__(self): + self.status = None + self.file = None + self.position = None + self.request_type = None + self.request = None + self.reason = None + + def get_map(self): + return { + "status": self.status.name.lower(), + # "file": self.file, + "position": self.position, + "request_type": self.request_type.name.lower(), + "request": self.request, + "reason": self.reason, + } + + @staticmethod + def __from_error(err): + if isinstance(err, Error): + result = TestStatus() + result.name = err.test_name + result.file = err.test_file + result.position = err.test_pos + result.request = err.request + result.reason = err.reason + return result + raise ProgramError("NotImplemented") + + @staticmethod + def from_exception(ex): + result = TestStatus.__from_error(ex) + + if isinstance(ex, StatementSuccess): + result.status = Status.success + result.request_type = RequestType.statement + elif isinstance(ex, StatementExecutionError): + result.status = Status.error + result.request_type = RequestType.statement + elif isinstance(ex, QuerySuccess): + result.status = Status.success + result.request_type = RequestType.query + elif isinstance(ex, QueryExecutionError): + result.status = Status.error + result.request_type = RequestType.query + elif isinstance(ex, SchemeResultDiffer): + result.status = Status.error + result.request_type = RequestType.query + elif isinstance(ex, DataResultDiffer): + result.status = Status.error + result.request_type = RequestType.query + else: + raise ProgramError("NotImplemented", parent=ex) + + return result + + +class SimpleStats: + def __init__(self, general=None): + self._general = general + self._success = 0 + self._fail = 0 + + @property + def all(self): + return self._success + self.fail + + @property + def success(self): + return self._success + + @success.setter + def success(self, value): + if self._general is not None: + self._general.success += value - self._success + self._success = value + + @property + def fail(self): + return self._fail + + @fail.setter + def fail(self, value): + if self._general is not None: + self._general.fail += value - self._fail + self._fail = value + + def __repr__(self): + return str(self.get_map()) + + def update(self, status): + if not isinstance(status, TestStatus): + raise ProgramError("NotImplemented") + + if status.status == Status.error: + self.fail += 1 + else: + self.success += 1 + + def get_map(self): + result = dict() + result["success"] = self.success + result["fail"] = self.fail + return result + + def combine_with(self, right): + if not isinstance(right, SimpleStats): + raise ProgramError("NotImplemented") + self.success += right.success + self.fail += right.fail + + +class Stats: + def __init__(self): + self.total = SimpleStats() + self.statements = SimpleStats(self.total) + self.queries = SimpleStats(self.total) + + def __repr__(self): + return str(self.get_map()) + + def update(self, status): + if not isinstance(status, TestStatus): + raise ProgramError("NotImplemented") + + if status.request_type == RequestType.query: + choose = self.queries + else: + choose = self.statements + choose.update(status) + + def get_map(self): + result = dict() + result["statements"] = self.statements.get_map() + result["queries"] = self.queries.get_map() + result["total"] = self.total.get_map() + return result + + def combine_with(self, right): + if not isinstance(right, Stats): + raise ProgramError("NotImplemented") + self.statements.combine_with(right.statements) + self.queries.combine_with(right.queries) + + +class OneReport: + def __init__(self, test_name, test_file): + self.test_name = test_name + self.test_file = test_file + self.stats = Stats() + self.requests = dict() # type: dict(int, TestStatus) + + def update(self, status): + if not isinstance(status, TestStatus): + raise ProgramError("NotImplemented") + + self.stats.update(status) + self.requests[status.position] = status + + def __repr__(self): + return str(self.get_map()) + + def get_map(self): + result = dict() + result["test_name"] = self.test_name + result["test_file"] = self.test_file + result["stats"] = self.stats.get_map() + result["requests"] = dict() + requests = result["requests"] + for pos, status in self.requests.items(): + requests[pos] = status.get_map() + return result + + +class Report: + def __init__(self, dbms_name, input_dir=None): + self.dbms_name = dbms_name + self.stats = Stats() + self.tests = dict() # type: dict(str, OneReport) + self.input_dir = input_dir + self.output_dir = None + + def update(self, status): + if not isinstance(status, TestStatus): + raise ProgramError("NotImplemented") + + self.stats.update(status) + self.__get_file_report(status).update(status) + + def __get_file_report(self, status): + if status.name not in self.tests: + self.tests[status.name] = OneReport(status.name, status.file) + return self.tests[status.name] + + def __repr__(self): + return str(self.get_map()) + + def assign_result_dir(self, res_dir): + self.output_dir = res_dir + + def get_map(self): + result = dict() + result["dbms_name"] = self.dbms_name + result["stats"] = self.stats.get_map() + result["input_dir"] = self.input_dir + if self.input_dir is not None: + result["input_dir"] = self.input_dir + if self.output_dir is not None: + result["output_dir"] = self.output_dir + result["tests"] = dict() + tests = result["tests"] + for test_name, one_report in self.tests.items(): + tests.update({test_name: one_report.get_map()}) + return result + + def combine_with(self, right): + if not isinstance(right, Report): + raise ProgramError("NotImplemented") + + if self.dbms_name != right.dbms_name: + raise ProgramError("reports are attached to the different databases") + + if self.input_dir is None or right.input_dir is None: + raise ProgramError("can't compare input dirs") + + if self.input_dir != right.input_dir: + raise ProgramError( + "can't combine reports, they are attached to the different input dirs" + ) + + for test_name in right.tests.keys(): + if test_name in self.tests: + raise ProgramError( + f"can't combine reports, they have intersect tests, {test_name}" + ) + + self.tests.update(right.tests) + self.stats.combine_with(right.stats) + return self + + def write_report(self, report_dir): + report_path = os.path.join(report_dir, "report.json") + logger.info(f"create file {report_path}") + with open(report_path, "w") as stream: + stream.write(json.dumps(self.get_map(), indent=4)) + + +class TestRunner: + def __init__( + self, + connection, + verify_mode=None, + skip_request_types=None, + stop_at_statement_error=None, + ): + self.connection = connection + self.verify = False if verify_mode is None else verify_mode + self.skip_request_types = [] + if skip_request_types is not None: + for req_type in skip_request_types: + self.with_skip(req_type) + self.stop_at_statement_error = ( + False if stop_at_statement_error is None else stop_at_statement_error + ) + + self.dbms_name = connection.DBMS_NAME + self.report = None + self.results = None + self._input_dir = None + + def with_verify_mode(self): + self.verify = True + return self + + def with_completion_mode(self): + self.verify = False + return self + + def with_skip(self, type_request): + if type_request == RequestType.query: + self.skip_request_types.append(test_parser.BlockType.query) + if type_request == RequestType.statement: + self.skip_request_types.append(test_parser.BlockType.statement) + + def __statuses(self, parser, out_stream): + skip_rest = False + + for block in parser.test_blocks(): + test_file = parser.get_test_file() + test_name = parser.get_test_name() + position = block.get_pos() + name_pos = f"{test_name}:{position}" + + clogger = logging.getLogger(f"parser at {name_pos}") + + if skip_rest: + clogger.debug("Skip rest blocks") + block.dump_to(out_stream) + continue + + if block.get_block_type() == test_parser.BlockType.comments: + clogger.debug("Skip comment block") + block.dump_to(out_stream) + continue + + if block.get_block_type() == test_parser.BlockType.control: + clogger.debug("Skip control block", name_pos) + block.dump_to(out_stream) + continue + + clogger.debug("Request <%s>", block.get_request()) + + cond_lines = block.get_conditions() + if not test_parser.check_conditions(cond_lines, self.dbms_name): + clogger.debug("Conditionally skip block for %s", self.dbms_name) + block.dump_to(out_stream) + continue + + request = block.get_request() + exec_res = execute_request(request, self.connection) + + if block.get_block_type() in self.skip_request_types: + clogger.debug("Runtime skip block for %s", self.dbms_name) + block.dump_to(out_stream) + continue + + if block.get_block_type() == test_parser.BlockType.statement: + try: + clogger.debug("this is statement") + if block.expected_error(): + clogger.debug("error is expected") + if not exec_res.has_exception(): + raise StatementExecutionError( + "statement request did not fail as expected" + ) + else: + clogger.debug("ok is expected") + if exec_res.has_exception(): + raise StatementExecutionError( + "statement failed with exception", + parent=exec_res.get_exception(), + ) + raise StatementSuccess() + except StatementSuccess as ok: + clogger.debug("statement is ok") + ok.set_details( + file=test_file, name=test_name, pos=position, request=request + ) + block.dump_to(out_stream) + yield TestStatus.from_exception(ok) + except StatementExecutionError as err: + err.set_details( + file=test_file, name=test_name, pos=position, request=request + ) + clogger.critical("Unable to execute statement, %s", err.reason) + block.dump_to(out_stream) + if self.stop_at_statement_error: + clogger.critical("Will skip the rest of the file") + skip_rest = True + yield TestStatus.from_exception(err) + + if block.get_block_type() == test_parser.BlockType.query: + try: + clogger.debug("this is query") + expected_error = block.expected_error() + if expected_error: + clogger.debug("error is expected %s", expected_error) + if exec_res.has_exception(): + e = exec_res.get_exception() + clogger.debug("had error %s", e) + message = str(e).lower() + if expected_error not in message: + clogger.debug("errors differed") + raise QueryExecutionError( + "query is expected to fail with different error", + details=f"expected error: {expected_error}", + parent=exec_res.get_exception(), + ) + else: + clogger.debug("errors matched") + raise QuerySuccess() + else: + clogger.debug("missed error") + raise QueryExecutionError( + "query is expected to fail with error", + details="expected error: {}".format(expected_error), + ) + else: + clogger.debug("success is expected") + if exec_res.has_exception(): + clogger.debug("had error") + if self.verify: + clogger.debug("verify mode") + canonic = test_parser.QueryResult.parse_it( + block.get_result(), 10 + ) + exception = QueryExecutionError( + "query execution failed with an exception", + parent=exec_res.get_exception(), + ) + actual = test_parser.QueryResult.as_exception(exception) + test_parser.QueryResult.assert_eq(canonic, actual) + block.with_result(actual) + raise QuerySuccess() + else: + clogger.debug("completion mode") + raise QueryExecutionError( + "query execution failed with an exception", + parent=exec_res.get_exception(), + ) + + canonic_types = block.get_types() + clogger.debug("canonic types %s", canonic_types) + + if len(exec_res.get_result()) > 0: + actual_columns_count = len(exec_res.get_result()[0]) + canonic_columns_count = len(canonic_types) + if canonic_columns_count != actual_columns_count: + raise SchemeResultDiffer( + "canonic and actual columns count differ", + details="expected columns {}, actual columns {}".format( + canonic_columns_count, actual_columns_count + ), + ) + + actual = test_parser.QueryResult.make_it( + exec_res.get_result(), canonic_types, block.get_sort_mode(), 10 + ) + + if self.verify: + clogger.debug("verify mode") + canonic = test_parser.QueryResult.parse_it( + block.get_result(), 10 + ) + test_parser.QueryResult.assert_eq(canonic, actual) + + block.with_result(actual) + raise QuerySuccess() + + except QuerySuccess as ok: + ok.set_details( + file=test_file, name=test_name, pos=position, request=request + ) + clogger.debug("query ok") + block.dump_to(out_stream) + yield TestStatus.from_exception(ok) + except Error as err: + err.set_details( + file=test_file, name=test_name, pos=position, request=request + ) + clogger.warning( + "Query has failed with exception: %s", + err.reason, + ) + block.with_result(test_parser.QueryResult.as_exception(err)) + block.dump_to(out_stream) + yield TestStatus.from_exception(err) + + def run_one_test(self, stream, test_name, test_file): + if self._input_dir is not None: + if not test_file.startswith(self._input_dir): + raise ProgramError( + f"that runner instance is attached to tests in dir {self._input_dir}" + f", can't run with file {test_file}" + ) + else: + self._input_dir = os.path.dirname(test_file) + + if self.report is None: + self.report = Report(self.dbms_name, self._input_dir) + + if self.results is None: + self.results = dict() + + with self.connection.with_one_test_scope(): + out_stream = io.StringIO() + self.results[test_name] = out_stream + + parser = test_parser.TestFileParser(stream, test_name, test_file) + for status in self.__statuses(parser, out_stream): + self.report.update(status) + + def _assert_input_dir(self, input_dir): + if self._input_dir is not None: + if self._input_dir != input_dir: + raise ProgramError( + f"that runner instance is attached to tests in dir {self._input_dir}" + f", can't run with {input_dir}" + ) + + def run_all_tests_from_file(self, test_file, input_dir=None): + self._assert_input_dir(input_dir) + self._input_dir = input_dir + if self._input_dir is None: + self._input_dir = os.path.dirname(test_file) + + test_name = os.path.relpath(test_file, start=self._input_dir) + logger.debug("open file %s", test_name) + with open(test_file, "r") as stream: + self.run_one_test(stream, test_name, test_file) + + def run_all_tests_from_dir(self, input_dir): + self._assert_input_dir(input_dir) + self._input_dir = input_dir + for file_path in TestRunner.list_tests(self._input_dir): + self.run_all_tests_from_file(file_path, self._input_dir) + + def write_results_to_dir(self, dir_path): + if not os.path.isdir(dir_path): + raise NotADirectoryError(dir_path) + + self.report.assign_result_dir(dir_path) + + for test_name, stream in self.results.items(): + test_file = os.path.join(dir_path, test_name) + logger.info(f"create file {test_file}") + result_dir = os.path.dirname(test_file) + os.makedirs(result_dir, exist_ok=True) + with open(test_file, "w") as output: + output.write(stream.getvalue()) + + def write_report(self, report_dir): + self.report.write_report(report_dir) + + @staticmethod + def list_tests(input_dir): + yield from _filter_files(".test", _list_files(input_dir)) diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index bb9d4c88fa1..47dd2fc9f2d 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -14,7 +14,7 @@ add_subdirectory (config-processor) add_subdirectory (report) # Not used in package -if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) +if (ENABLE_UTILS) add_subdirectory (compressor) add_subdirectory (corrector_utf8) add_subdirectory (zookeeper-cli) @@ -23,16 +23,8 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (wikistat-loader) add_subdirectory (check-marks) add_subdirectory (checksum-for-compressed-block) - add_subdirectory (wal-dump) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) - - if (TARGET ch_contrib::nuraft) - add_subdirectory (keeper-data-dumper) - endif () - - # memcpy_jart.S contains position dependent code - if (NOT CMAKE_POSITION_INDEPENDENT_CODE AND OS_LINUX AND ARCH_AMD64) - add_subdirectory (memcpy-bench) - endif () + add_subdirectory (keeper-data-dumper) + add_subdirectory (memcpy-bench) endif () diff --git a/utils/changelog-simple/.gitignore b/utils/changelog-simple/.gitignore deleted file mode 100644 index 78caa68e38e..00000000000 --- a/utils/changelog-simple/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.txt -*.json diff --git a/utils/changelog-simple/README.md b/utils/changelog-simple/README.md deleted file mode 100644 index cd8f8da9b61..00000000000 --- a/utils/changelog-simple/README.md +++ /dev/null @@ -1,21 +0,0 @@ -## How To Generate Changelog - -Generate github token: -* https://github.com/settings/tokens - keep all checkboxes unchecked, no scopes need to be enabled. - -Dependencies: -``` -sudo apt-get install git curl jq python3 python3-fuzzywuzzy -``` - -Update information about tags: -``` -git fetch --tags -``` - -Usage example: - -``` -export GITHUB_USER=... GITHUB_TOKEN=ghp_... -./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable -``` diff --git a/utils/changelog-simple/changelog.sh b/utils/changelog-simple/changelog.sh deleted file mode 100755 index 52817acfae4..00000000000 --- a/utils/changelog-simple/changelog.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -set -e - -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -from="$1" -to="$2" -log_command=(git log "$from..$to" --first-parent) - -"${log_command[@]}" > "changelog-log.txt" - -# Check for diamond merges. -if "${log_command[@]}" --oneline --grep "Merge branch '" | grep '' -then - # DO NOT ADD automated handling of diamond merges to this script. - # It is an unsustainable way to work with git, and it MUST be visible. - echo Warning: suspected diamond merges above. - echo Some commits will be missed, review these manually. -fi - -# Search for PR numbers in commit messages. First variant is normal merge, and second -# variant is squashed. Next are some backport message variants. -find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip") - -# awk is to filter out small task numbers from different task tracker, which are -# referenced by documentation commits like '* DOCSUP-824: query log (#115)'. -"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt" - -echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." -if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi - -function github_download() -{ - local url=${1} - local file=${2} - if ! [ -f "$file" ] - then - echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\"" - - if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \ - -sSf "$url" \ - > "$file" - then - >&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'." - rm "$file" - return 1 - fi - sleep 0.1 - fi -} - -rm changelog-prs-filtered.txt &> /dev/null ||: -for pr in $(cat "changelog-prs.txt") -do - # Download PR info from github. - file="pr$pr.json" - github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue - - if ! [ "$pr" == "$(jq -r .number "$file")" ] - then - >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." - continue - fi - - # Filter out PRs by bots. - user_login=$(jq -r .user.login "$file") - - filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:) - filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:) - - if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ] - then - continue - fi - - # Download author info from github. - user_id=$(jq -r .user.id "$file") - user_file="user$user_id.json" - github_download "$(jq -r .user.url "$file")" "$user_file" || continue - - if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] - then - >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." - continue - fi - - echo "$pr" >> changelog-prs-filtered.txt -done - -echo "### ClickHouse release $to FIXME as compared to $from -" > changelog.md -"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md -cat changelog.md diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py deleted file mode 100755 index d5e1518270e..00000000000 --- a/utils/changelog-simple/format-changelog.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import collections -import fuzzywuzzy.fuzz -import itertools -import json -import os -import re -import sys - -parser = argparse.ArgumentParser(description="Format changelog for given PRs.") -parser.add_argument( - "file", - metavar="FILE", - type=argparse.FileType("r", encoding="utf-8"), - nargs="?", - default=sys.stdin, - help="File with PR numbers, one per line.", -) -args = parser.parse_args() - -# This function mirrors the PR description checks in ClickhousePullRequestTrigger. -# Returns False if the PR should not be mentioned changelog. -def parse_one_pull_request(item): - description = item["body"] - # Don't skip empty lines because they delimit parts of description - lines = [ - line - for line in [ - x.strip() for x in (description.split("\n") if description else []) - ] - ] - lines = [re.sub(r"\s+", " ", l) for l in lines] - - category = "" - entry = "" - - if lines: - i = 0 - while i < len(lines): - if re.match(r"(?i).*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category itself. Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - - elif re.match(r"(?i).*change\s*log\s*entry", lines[i]): - i += 1 - # Can have one empty line between header and the entry itself. Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - else: - i += 1 - - if not category: - # Shouldn't happen, because description check in CI should catch such PRs. - # Fall through, so that it shows up in output and the user can fix it. - category = "NO CL CATEGORY" - - # Filter out the PR categories that are not for changelog. - if re.match( - r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", - category, - ): - return False - - if not entry: - # Shouldn't happen, because description check in CI should catch such PRs. - category = "NO CL ENTRY" - entry = "NO CL ENTRY: '" + item["title"] + "'" - - entry = entry.strip() - if entry[-1] != ".": - entry += "." - - item["entry"] = entry - item["category"] = category - - return True - - -# This array gives the preferred category order, and is also used to -# normalize category names. -categories_preferred_order = [ - "Backward Incompatible Change", - "New Feature", - "Performance Improvement", - "Improvement", - "Bug Fix", - "Build/Testing/Packaging Improvement", - "Other", -] - -category_to_pr = collections.defaultdict(lambda: []) -users = {} -for line in args.file: - pr = json.loads(open(f"pr{line.strip()}.json").read()) - assert pr["number"] - if not parse_one_pull_request(pr): - continue - - assert pr["category"] - - # Normalize category name - for c in categories_preferred_order: - if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90: - pr["category"] = c - break - - category_to_pr[pr["category"]].append(pr) - user_id = pr["user"]["id"] - users[user_id] = json.loads(open(f"user{user_id}.json").read()) - - -def print_category(category): - print(("#### " + category)) - print() - for pr in category_to_pr[category]: - user = users[pr["user"]["id"]] - user_name = user["name"] if user["name"] else user["login"] - - # Substitute issue links. - # 1) issue number w/o markdown link - pr["entry"] = re.sub( - r"([^[])#([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - # 2) issue URL w/o markdown link - pr["entry"] = re.sub( - r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - pr["entry"], - ) - - print( - f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).' - ) - - print() - - -# Print categories in preferred order -for category in categories_preferred_order: - if category in category_to_pr: - print_category(category) - category_to_pr.pop(category) - -# Print the rest of the categories -for category in category_to_pr: - print_category(category) diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index 3b66b68193b..6b70952eced 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -126,8 +126,8 @@ def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]: def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="Generate a changelog in MD format between given tags. " - "It fetches all tags and unshallow the git repositore automatically", + description="Generate a changelog in Markdown format between given tags. " + "It fetches all tags and unshallow the git repository automatically", ) parser.add_argument( "-v", @@ -243,6 +243,14 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri else: i += 1 + # Remove excessive bullets from the entry. + if re.match(r"^[\-\*] ", entry): + entry = entry[2:] + + # Better style. + if re.match(r"^[a-z]", entry): + entry = entry.capitalize() + if not category: # Shouldn't happen, because description check in CI should catch such PRs. # Fall through, so that it shows up in output and the user can fix it. @@ -256,6 +264,14 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri category = "NOT FOR CHANGELOG / INSIGNIFICANT" return Description(item.number, item.user, item.html_url, item.title, category) + # Normalize bug fixes + if re.match( + r"(?i)bug\Wfix", + category, + ): + category = "Bug Fix (user-visible misbehavior in an official stable release)" + return Description(item.number, item.user, item.html_url, item.title, category) + # Filter out documentations changelog if re.match( r"(?i)doc", diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index 7dd387ba5be..68558340180 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -17,7 +17,8 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( std::shared_ptr & last_table_map_event, bool exist_checksum) { DB::MySQLReplication::BinlogEventPtr event; - DB::ReadBufferPtr limit_read_buffer = std::make_shared(payload, header.event_size - 19, false); + DB::ReadBufferPtr limit_read_buffer = std::make_shared(payload, header.event_size - 19, + /* trow_exception */ false, /* exact_limit */ std::nullopt); DB::ReadBufferPtr event_payload = std::make_shared(*limit_read_buffer, exist_checksum ? 4 : 0); switch (header.type) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 1ad7432a5bf..65844a4946e 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 484 +personal_ws-1.1 en 543 AArch ACLs AMQP @@ -24,7 +24,9 @@ CapnProto CentOS ClickHouse ClickHouse's +ClickableSquare CodeBlock +CodeLLDB Config ConnectionDetails Contrib @@ -52,13 +54,14 @@ HDDs Heredoc Homebrew Homebrew's +HorizontalDivide Hostname +INSERTed IPv IntN Integrations -invariants -JSONAsString JSONAsObject +JSONAsString JSONColumns JSONColumnsWithMetadata JSONCompact @@ -82,13 +85,17 @@ Jemalloc Jepsen KDevelop LGPL +LLDB +LLVM's LOCALTIME LOCALTIMESTAMP LibFuzzer LineAsString +LinksDeployment LowCardinality MEMTABLE MSan +MVCC MacOS Memcheck MemorySanitizer @@ -104,14 +111,17 @@ NULLIF NVME NYPD NuRaft -ObjectId -Ok OLAP OLTP +ObjectId +Observability +Ok +OpenSSL OpenSUSE OpenStack OpenTelemetry PAAMAYIM +ParquetMetadata Parsers Postgres Precompiled @@ -119,6 +129,7 @@ PrettyCompact PrettyCompactMonoBlock PrettyCompactNoEscapes PrettyCompactNoEscapesMonoBlock +PrettyJSONEachRow PrettyMonoBlock PrettyNoEscapes PrettyNoEscapesMonoBlock @@ -128,10 +139,10 @@ PrettySpaceNoEscapes PrettySpaceNoEscapesMonoBlock Protobuf ProtobufSingle +QEMU QTCreator QueryCacheHits QueryCacheMisses -QEMU RBAC RawBLOB RedHat @@ -141,8 +152,10 @@ RowBinaryWithNames RowBinaryWithNamesAndTypes Runtime SATA +SELECTs SERIALIZABLE SIMD +SLES SMALLINT SQLInsert SQLSTATE @@ -167,6 +180,8 @@ Testflows Tgz Toolset Tradeoff +Transactional +TwoColumnList UBSan UInt UIntN @@ -176,16 +191,18 @@ Updatable Util Valgrind Vectorized +VideoContainer +ViewAllLink VirtualBox Werror +WithNamesAndTypes Woboq WriteBuffer WriteBuffers -WithNamesAndTypes XCode YAML -Yasm YYYY +Yasm Zipkin ZooKeeper ZooKeeper's @@ -195,11 +212,13 @@ analytics anonymized ansi async +atomicity autogeneration autostart avro avx aws +backend backoff backticks benchmarking @@ -275,6 +294,7 @@ dmesg dont dragonbox durations +encodings endian enum exFAT @@ -286,6 +306,7 @@ flatbuffers fmtlib formatschema formatter +fsync fuzzer fuzzers gRPC @@ -309,6 +330,7 @@ instantiation integrational integrations interserver +invariants jdbc jemalloc json @@ -333,8 +355,8 @@ jsonstringseachrowwithprogress kafka kafkacat konsole -latencies laion +latencies lexicographically libFuzzer libc @@ -358,11 +380,13 @@ linearizability linearizable lineasstring linefeeds +lineorder linux llvm localhost macOS mariadb +mdadm miniselect msgpack msgpk @@ -373,13 +397,15 @@ mutex mysql mysqldump mysqljs +natively noop -nullable nullability +nullable num obfuscator odbc ok +openSUSE openldap opentelemetry overcommit @@ -407,6 +433,7 @@ prettycompact prettycompactmonoblock prettycompactnoescapes prettycompactnoescapesmonoblock +prettyjsoneachrow prettymonoblock prettynoescapes prettynoescapesmonoblock @@ -500,6 +527,7 @@ tokenization toml toolchain toolset +transactional transactionally tskv tsv @@ -532,6 +560,7 @@ xcode xml xz zLib +zLinux zkcopy zlib znodes diff --git a/utils/check-style/check-black b/utils/check-style/check-black index 141dcd1b406..33c463d1668 100755 --- a/utils/check-style/check-black +++ b/utils/check-style/check-black @@ -4,10 +4,22 @@ set -e # We check only our code, that's why we skip contrib GIT_ROOT=$(git rev-parse --show-cdup) -GIT_ROOT=${GIT_ROOT:-.} +GIT_ROOT=${GIT_ROOT:-./} tmp=$(mktemp) -# Find all *.py files in the repo except the contrib directory -find_cmd=(find "$GIT_ROOT" -name '*.py' -not -path "$GIT_ROOT/contrib/*") + +# Find all *.py, *.python files and executable files without extension +# that are determined as python scripts by 'file' util +# in the repo except the contrib directory. +find_cmd=( + find "$GIT_ROOT" -type f -not -path "${GIT_ROOT}contrib/*" + \( + -name '*.py' -or -name "*.python" -or + \( + -executable -not -name "*.*" -exec sh -c 'file {} | grep -q "Python script"' \; + \) + \) +) + if ! "${find_cmd[@]}" -exec black --check --diff {} + 1>"$tmp" 2>&1; then # Show the result only if some files need formatting cat "$tmp" @@ -16,4 +28,5 @@ if ! "${find_cmd[@]}" -exec black --check --diff {} + 1>"$tmp" 2>&1; then # Automatically add changed files to stage "${find_cmd[@]}" -exec git add -u {} + 1>/dev/null 2>&1 fi + rm "$tmp" diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 53165d14f96..afaf2ee6d48 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -13,7 +13,7 @@ # and then to run formatter only for the specified files. ROOT_PATH=$(git rev-parse --show-toplevel) -EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/' +EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/|utils/keeper-bench/example.yaml' # From [1]: # But since array_to_string_internal() in array.c still loops over array @@ -67,6 +67,7 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::Counters ProfileEvents::end ProfileEvents::increment + ProfileEvents::incrementForLogMessage ProfileEvents::getName ProfileEvents::Type ProfileEvents::TypeEnum @@ -78,6 +79,7 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::add CurrentMetrics::sub + CurrentMetrics::get CurrentMetrics::set CurrentMetrics::end CurrentMetrics::Increment @@ -165,7 +167,7 @@ find $ROOT_PATH/tests/queries -iname '*fail*' | grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.' # Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition -# NOTE: it is not that accuate, but at least something. +# NOTE: it is not that accurate, but at least something. tests_with_query_log=( $( find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' | grep -vP $EXCLUDE_DIRS | @@ -177,6 +179,17 @@ for test_case in "${tests_with_query_log[@]}"; do } || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case" done +# There shouldn't be large jumps between test numbers (since they should be consecutive) +max_diff=$( + find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' | + grep -oP '\d+\D+\K\d+' | sort -n -u | awk 's{print ($0-s) " diff " s " and " $0 }{s=$0}' | sort -n | tail -n 1 +) +max_diff_value=( $(echo $max_diff) ) +if [[ $max_diff_value -ge 100 ]]; +then + echo "Too big of a difference between test numbers: $max_diff" +fi + # Queries to: tables_with_database_column=( system.tables @@ -392,3 +405,9 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | # Check for existence of __init__.py files for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done + +# A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647 +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE + +# If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr." diff --git a/utils/check-style/shellcheck-run.sh b/utils/check-style/shellcheck-run.sh index c0063d4b191..bdb0f681c31 100755 --- a/utils/check-style/shellcheck-run.sh +++ b/utils/check-style/shellcheck-run.sh @@ -1,9 +1,14 @@ #!/usr/bin/env bash ROOT_PATH=$(git rev-parse --show-toplevel) -EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|memcpy/|consistent-hashing/|Parsers/New' +NPROC=$(($(nproc) + 3)) # Check sh tests with Shellcheck -(cd $ROOT_PATH/tests/queries/0_stateless/ && shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 *.sh ../1_stateful/*.sh) +( cd "$ROOT_PATH/tests/queries/0_stateless/" && \ + find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ + xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 +) # Check docker scripts with shellcheck -find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | awk -F' ' '$2==" text/x-shellscript" {print $1}' | grep -v "entrypoint.alpine.sh" | grep -v "compare.sh"| xargs shellcheck - +find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | \ + awk -F' ' '$2==" text/x-shellscript" {print $1}' | \ + grep -v "compare.sh" | \ + xargs -P "$NPROC" -n 20 shellcheck diff --git a/utils/checksum-for-compressed-block/main.cpp b/utils/checksum-for-compressed-block/main.cpp index 27a2154340e..4f9923e7638 100644 --- a/utils/checksum-for-compressed-block/main.cpp +++ b/utils/checksum-for-compressed-block/main.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include /** A tool to easily prove if "Checksum doesn't match: corrupted data" diff --git a/utils/ci-slack-bot/ci-slack-bot.py b/utils/ci-slack-bot/ci-slack-bot.py new file mode 100755 index 00000000000..6e694b4fdbd --- /dev/null +++ b/utils/ci-slack-bot/ci-slack-bot.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python3 + +# A trivial stateless slack bot that notifies about new broken tests in ClickHouse CI. +# It checks what happened to our CI during the last check_period hours (1 hour) and notifies us in slack if necessary. +# This script should be executed once each check_period hours (1 hour). +# It will post duplicate messages if you run it more often; it will lose some messages if you run it less often. +# +# You can run it locally with no arguments, it will work in a dry-run mode. Or you can set your own SLACK_URL_DEFAULT. +# Feel free to add more checks, more details to messages, or better heuristics. +# NOTE There's no deployment automation for now, +# an AWS Lambda (slack-ci-bot-test lambda in CI-CD) has to be updated manually after changing this script. +# +# See also: https://aretestsgreenyet.com/ + +import os +import json +import base64 +import random + +if os.environ.get("AWS_LAMBDA_ENV", "0") == "1": + # For AWS labmda (python 3.7) + from botocore.vendored import requests +else: + # For running locally + import requests + +DRY_RUN_MARK = "" + +MAX_FAILURES_DEFAULT = 40 +SLACK_URL_DEFAULT = DRY_RUN_MARK + +FLAKY_ALERT_PROBABILITY = 0.20 + +MAX_TESTS_TO_REPORT = 4 + +# Slack has a stupid limitation on message size, it splits long messages into multiple ones breaking formatting +MESSAGE_LENGTH_LIMIT = 4000 + +# Find tests that failed in master during the last check_period * 24 hours, +# but did not fail during the last 2 weeks. Assuming these tests were broken recently. +# Counts number of failures in check_period and check_period * 24 time windows +# to distinguish rare flaky tests from completely broken tests +NEW_BROKEN_TESTS_QUERY = """ +WITH + 1 AS check_period, + check_period * 24 AS extended_check_period, + now() as now +SELECT + test_name, + any(report_url), + countIf((check_start_time + check_duration_ms / 1000) < now - INTERVAL check_period HOUR) AS count_prev_periods, + countIf((check_start_time + check_duration_ms / 1000) >= now - INTERVAL check_period HOUR) AS count +FROM checks +WHERE 1 + AND check_start_time BETWEEN now - INTERVAL 1 WEEK AND now + AND (check_start_time + check_duration_ms / 1000) >= now - INTERVAL extended_check_period HOUR + AND pull_request_number = 0 + AND test_status LIKE 'F%' + AND check_status != 'success' + AND test_name NOT IN ( + SELECT test_name FROM checks WHERE 1 + AND check_start_time >= now - INTERVAL 1 MONTH + AND (check_start_time + check_duration_ms / 1000) BETWEEN now - INTERVAL 2 WEEK AND now - INTERVAL extended_check_period HOUR + AND pull_request_number = 0 + AND check_status != 'success' + AND test_status LIKE 'F%') + AND test_context_raw NOT LIKE '%CannotSendRequest%' and test_context_raw NOT LIKE '%Server does not respond to health check%' +GROUP BY test_name +ORDER BY (count_prev_periods + count) DESC +""" + +# Returns total number of failed checks during the last 24 hours +# and previous value of that metric (check_period hours ago) +COUNT_FAILURES_QUERY = """ +WITH + 1 AS check_period, + '%' AS check_name_pattern, + now() as now +SELECT + countIf((check_start_time + check_duration_ms / 1000) >= now - INTERVAL 24 HOUR) AS new_val, + countIf((check_start_time + check_duration_ms / 1000) <= now - INTERVAL check_period HOUR) AS prev_val +FROM checks +WHERE 1 + AND check_start_time >= now - INTERVAL 1 WEEK + AND (check_start_time + check_duration_ms / 1000) >= now - INTERVAL 24 + check_period HOUR + AND pull_request_number = 0 + AND test_status LIKE 'F%' + AND check_status != 'success' + AND check_name ILIKE check_name_pattern +""" + +# It shows all recent failures of the specified test (helps to find when it started) +ALL_RECENT_FAILURES_QUERY = """ +WITH + '{}' AS name_substr, + 90 AS interval_days, + ('Stateless tests (asan)', 'Stateless tests (address)', 'Stateless tests (address, actions)') AS backport_and_release_specific_checks +SELECT + toStartOfDay(check_start_time) AS d, + count(), + groupUniqArray(pull_request_number) AS prs, + any(report_url) +FROM checks +WHERE ((now() - toIntervalDay(interval_days)) <= check_start_time) AND (pull_request_number NOT IN ( + SELECT pull_request_number AS prn + FROM checks + WHERE (prn != 0) AND ((now() - toIntervalDay(interval_days)) <= check_start_time) AND (check_name IN (backport_and_release_specific_checks)) +)) AND (position(test_name, name_substr) > 0) AND (test_status IN ('FAIL', 'ERROR', 'FLAKY')) +GROUP BY d +ORDER BY d DESC +""" + +SLACK_MESSAGE_JSON = {"type": "mrkdwn", "text": None} + + +def get_play_url(query): + return ( + "https://play.clickhouse.com/play?user=play#" + + base64.b64encode(query.encode()).decode() + ) + + +def run_clickhouse_query(query): + url = "https://play.clickhouse.com/?user=play&query=" + requests.utils.quote(query) + res = requests.get(url) + if res.status_code != 200: + print("Failed to execute query: ", res.status_code, res.content) + raise Exception( + "Failed to execute query: {}: {}".format(res.status_code, res.content) + ) + + lines = res.text.strip().splitlines() + return [x.split("\t") for x in lines] + + +def split_broken_and_flaky_tests(failed_tests): + if not failed_tests: + return None + + broken_tests = [] + flaky_tests = [] + for name, report, count_prev_str, count_str in failed_tests: + count_prev, count = int(count_prev_str), int(count_str) + if (2 <= count and count_prev < 2) or (count_prev == 1 and count == 1): + # It failed 2 times or more within extended time window, it's definitely broken. + # 2 <= count_prev means that it was not reported as broken on previous runs + broken_tests.append([name, report]) + elif 0 < count and count_prev == 0: + # It failed only once, can be a rare flaky test + flaky_tests.append([name, report]) + + return broken_tests, flaky_tests + + +def format_failed_tests_list(failed_tests, failure_type): + if len(failed_tests) == 1: + res = "There is a new {} test:\n".format(failure_type) + else: + res = "There are {} new {} tests:\n".format(len(failed_tests), failure_type) + + for name, report in failed_tests[:MAX_TESTS_TO_REPORT]: + cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name)) + res += "- *{}* - <{}|Report> - <{}|CI DB> \n".format( + name, report, cidb_url + ) + + if MAX_TESTS_TO_REPORT < len(failed_tests): + res += "- and {} other tests... :this-is-fine-fire:".format( + len(failed_tests) - MAX_TESTS_TO_REPORT + ) + + return res + + +def get_new_broken_tests_message(failed_tests): + if not failed_tests: + return None + + broken_tests, flaky_tests = split_broken_and_flaky_tests(failed_tests) + if len(broken_tests) == 0 and len(flaky_tests) == 0: + return None + + msg = "" + if len(broken_tests) > 0: + msg += format_failed_tests_list(broken_tests, "*BROKEN*") + elif random.random() > FLAKY_ALERT_PROBABILITY: + looks_like_fuzzer = [x[0].count(" ") > 2 for x in flaky_tests] + if not any(looks_like_fuzzer): + print("Will not report flaky tests to avoid noise: ", flaky_tests) + return None + + if len(flaky_tests) > 0: + if len(msg) > 0: + msg += "\n" + msg += format_failed_tests_list(flaky_tests, "flaky") + + return msg + + +def get_too_many_failures_message_impl(failures_count): + MAX_FAILURES = int(os.environ.get("MAX_FAILURES", MAX_FAILURES_DEFAULT)) + curr_failures = int(failures_count[0][0]) + prev_failures = int(failures_count[0][1]) + if curr_failures == 0 and prev_failures != 0: + return ( + "Looks like CI is completely broken: there are *no failures* at all... 0_o" + ) + if curr_failures < MAX_FAILURES: + return None + if prev_failures < MAX_FAILURES: + return ":alert: *CI is broken: there are {} failures during the last 24 hours*".format( + curr_failures + ) + if curr_failures < prev_failures: + return None + if (curr_failures - prev_failures) / prev_failures < 0.2: + return None + return "CI is broken and it's getting worse: there are {} failures during the last 24 hours".format( + curr_failures + ) + + +def get_too_many_failures_message(failures_count): + msg = get_too_many_failures_message_impl(failures_count) + if msg: + msg += "\nSee https://aretestsgreenyet.com/" + return msg + + +def split_slack_message(long_message): + lines = long_message.split("\n") + messages = [] + curr_msg = "" + for line in lines: + if len(curr_msg) + len(line) < MESSAGE_LENGTH_LIMIT: + curr_msg += "\n" + curr_msg += line + else: + messages.append(curr_msg) + curr_msg = line + messages.append(curr_msg) + return messages + + +def send_to_slack_impl(message): + SLACK_URL = os.environ.get("SLACK_URL", SLACK_URL_DEFAULT) + if SLACK_URL == DRY_RUN_MARK: + return + + payload = SLACK_MESSAGE_JSON.copy() + payload["text"] = message + res = requests.post(SLACK_URL, json.dumps(payload)) + if res.status_code != 200: + print("Failed to send a message to Slack: ", res.status_code, res.content) + raise Exception( + "Failed to send a message to Slack: {}: {}".format( + res.status_code, res.content + ) + ) + + +def send_to_slack(message): + messages = split_slack_message(message) + for msg in messages: + send_to_slack_impl(msg) + + +def query_and_alert_if_needed(query, get_message_func): + query_res = run_clickhouse_query(query) + print("Got result {} for query {}", query_res, query) + msg = get_message_func(query_res) + if msg is None: + return + + msg += "\nCI DB query: <{}|link>".format(get_play_url(query)) + print("Sending message to slack:", msg) + send_to_slack(msg) + + +def check_and_alert(): + query_and_alert_if_needed(NEW_BROKEN_TESTS_QUERY, get_new_broken_tests_message) + query_and_alert_if_needed(COUNT_FAILURES_QUERY, get_too_many_failures_message) + + +def lambda_handler(event, context): + try: + check_and_alert() + return {"statusCode": 200, "body": "OK"} + except Exception as e: + send_to_slack( + "I failed, please help me (see ClickHouse/utils/ci-slack-bot/ci-slack-bot.py): " + + str(e) + ) + return {"statusCode": 200, "body": "FAIL"} + + +if __name__ == "__main__": + check_and_alert() diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics index cf65e4efbfb..5cacbf1d4d4 100755 --- a/utils/clickhouse-diagnostics/clickhouse-diagnostics +++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics @@ -19,9 +19,9 @@ import tenacity import xmltodict import yaml -SELECT_VERSION = r'SELECT version()' +SELECT_VERSION = r"SELECT version()" -SELECT_UPTIME = r''' +SELECT_UPTIME = r""" {% if version_ge('21.3') -%} SELECT formatReadableTimeDelta(uptime()) {% else -%} @@ -29,18 +29,18 @@ SELECT toString(floor(uptime() / 3600 / 24)) || ' days ' || toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours' {% endif -%} -''' +""" SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'" -SELECT_DATABASE_ENGINES = r'''SELECT +SELECT_DATABASE_ENGINES = r"""SELECT engine, count() "count" FROM system.databases GROUP BY engine -''' +""" -SELECT_DATABASES = r'''SELECT +SELECT_DATABASES = r"""SELECT name, engine, tables, @@ -62,17 +62,17 @@ LEFT JOIN ) AS db_stats ON db.name = db_stats.database ORDER BY bytes_on_disk DESC LIMIT 10 -''' +""" -SELECT_TABLE_ENGINES = r'''SELECT +SELECT_TABLE_ENGINES = r"""SELECT engine, count() "count" FROM system.tables WHERE database != 'system' GROUP BY engine -''' +""" -SELECT_DICTIONARIES = r'''SELECT +SELECT_DICTIONARIES = r"""SELECT source, type, status, @@ -80,13 +80,13 @@ SELECT_DICTIONARIES = r'''SELECT FROM system.dictionaries GROUP BY source, type, status ORDER BY status DESC, source -''' +""" SELECT_ACCESS = "SHOW ACCESS" SELECT_QUOTA_USAGE = "SHOW QUOTA" -SELECT_REPLICAS = r'''SELECT +SELECT_REPLICAS = r"""SELECT database, table, is_leader, @@ -98,9 +98,9 @@ SELECT_REPLICAS = r'''SELECT FROM system.replicas ORDER BY absolute_delay DESC LIMIT 10 -''' +""" -SELECT_REPLICATION_QUEUE = r'''SELECT +SELECT_REPLICATION_QUEUE = r"""SELECT database, table, replica_name, @@ -121,9 +121,9 @@ SELECT_REPLICATION_QUEUE = r'''SELECT FROM system.replication_queue ORDER BY create_time ASC LIMIT 20 -''' +""" -SELECT_REPLICATED_FETCHES = r'''SELECT +SELECT_REPLICATED_FETCHES = r"""SELECT database, table, round(elapsed, 1) "elapsed", @@ -140,9 +140,9 @@ SELECT_REPLICATED_FETCHES = r'''SELECT to_detached, thread_id FROM system.replicated_fetches -''' +""" -SELECT_PARTS_PER_TABLE = r'''SELECT +SELECT_PARTS_PER_TABLE = r"""SELECT database, table, count() "partitions", @@ -162,9 +162,9 @@ FROM GROUP BY database, table ORDER BY max_parts_per_partition DESC LIMIT 10 -''' +""" -SELECT_MERGES = r'''SELECT +SELECT_MERGES = r"""SELECT database, table, round(elapsed, 1) "elapsed", @@ -187,9 +187,9 @@ SELECT_MERGES = r'''SELECT formatReadableSize(memory_usage) "memory_usage" {% endif -%} FROM system.merges -''' +""" -SELECT_MUTATIONS = r'''SELECT +SELECT_MUTATIONS = r"""SELECT database, table, mutation_id, @@ -206,9 +206,9 @@ SELECT_MUTATIONS = r'''SELECT FROM system.mutations WHERE NOT is_done ORDER BY create_time DESC -''' +""" -SELECT_RECENT_DATA_PARTS = r'''SELECT +SELECT_RECENT_DATA_PARTS = r"""SELECT database, table, engine, @@ -242,9 +242,9 @@ SELECT_RECENT_DATA_PARTS = r'''SELECT FROM system.parts WHERE modification_time > now() - INTERVAL 3 MINUTE ORDER BY modification_time DESC -''' +""" -SELECT_DETACHED_DATA_PARTS = r'''SELECT +SELECT_DETACHED_DATA_PARTS = r"""SELECT database, table, partition_id, @@ -255,9 +255,9 @@ SELECT_DETACHED_DATA_PARTS = r'''SELECT max_block_number, level FROM system.detached_parts -''' +""" -SELECT_PROCESSES = r'''SELECT +SELECT_PROCESSES = r"""SELECT elapsed, query_id, {% if normalize_queries -%} @@ -285,9 +285,9 @@ SELECT_PROCESSES = r'''SELECT {% endif -%} FROM system.processes ORDER BY elapsed DESC -''' +""" -SELECT_TOP_QUERIES_BY_DURATION = r'''SELECT +SELECT_TOP_QUERIES_BY_DURATION = r"""SELECT type, query_start_time, query_duration_ms, @@ -339,9 +339,9 @@ WHERE type != 'QueryStart' AND event_time >= now() - INTERVAL 1 DAY ORDER BY query_duration_ms DESC LIMIT 10 -''' +""" -SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r'''SELECT +SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r"""SELECT type, query_start_time, query_duration_ms, @@ -393,9 +393,9 @@ WHERE type != 'QueryStart' AND event_time >= now() - INTERVAL 1 DAY ORDER BY memory_usage DESC LIMIT 10 -''' +""" -SELECT_FAILED_QUERIES = r'''SELECT +SELECT_FAILED_QUERIES = r"""SELECT type, query_start_time, query_duration_ms, @@ -448,9 +448,9 @@ WHERE type != 'QueryStart' AND exception != '' ORDER BY query_start_time DESC LIMIT 10 -''' +""" -SELECT_STACK_TRACES = r'''SELECT +SELECT_STACK_TRACES = r"""SELECT '\n' || arrayStringConcat( arrayMap( x, @@ -459,9 +459,9 @@ SELECT_STACK_TRACES = r'''SELECT arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') AS trace FROM system.stack_trace -''' +""" -SELECT_CRASH_LOG = r'''SELECT +SELECT_CRASH_LOG = r"""SELECT event_time, signal, thread_id, @@ -470,7 +470,7 @@ SELECT_CRASH_LOG = r'''SELECT version FROM system.crash_log ORDER BY event_time DESC -''' +""" def retry(exception_types, max_attempts=5, max_interval=5): @@ -481,7 +481,8 @@ def retry(exception_types, max_attempts=5, max_interval=5): retry=tenacity.retry_if_exception_type(exception_types), wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval), stop=tenacity.stop_after_attempt(max_attempts), - reraise=True) + reraise=True, + ) class ClickhouseError(Exception): @@ -502,9 +503,9 @@ class ClickhouseClient: def __init__(self, *, host="localhost", port=8123, user="default", password): self._session = requests.Session() if user: - self._session.headers['X-ClickHouse-User'] = user - self._session.headers['X-ClickHouse-Key'] = password - self._url = f'http://{host}:{port}' + self._session.headers["X-ClickHouse-User"] = user + self._session.headers["X-ClickHouse-Key"] = password + self._url = f"http://{host}:{port}" self._timeout = 60 self._ch_version = None @@ -516,7 +517,16 @@ class ClickhouseClient: return self._ch_version @retry(requests.exceptions.ConnectionError) - def query(self, query, query_args=None, format=None, post_data=None, timeout=None, echo=False, dry_run=False): + def query( + self, + query, + query_args=None, + format=None, + post_data=None, + timeout=None, + echo=False, + dry_run=False, + ): """ Execute query. """ @@ -524,28 +534,30 @@ class ClickhouseClient: query = self.render_query(query, **query_args) if format: - query += f' FORMAT {format}' + query += f" FORMAT {format}" if timeout is None: timeout = self._timeout if echo: - print(sqlparse.format(query, reindent=True), '\n') + print(sqlparse.format(query, reindent=True), "\n") if dry_run: return None try: - response = self._session.post(self._url, - params={ - 'query': query, - }, - json=post_data, - timeout=timeout) + response = self._session.post( + self._url, + params={ + "query": query, + }, + json=post_data, + timeout=timeout, + ) response.raise_for_status() - if format in ('JSON', 'JSONCompact'): + if format in ("JSON", "JSONCompact"): return response.json() return response.text.strip() @@ -555,7 +567,9 @@ class ClickhouseClient: def render_query(self, query, **kwargs): env = jinja2.Environment() - env.globals['version_ge'] = lambda version: version_ge(self.clickhouse_version, version) + env.globals["version_ge"] = lambda version: version_ge( + self.clickhouse_version, version + ) template = env.from_string(query) return template.render(kwargs) @@ -578,11 +592,13 @@ class ClickhouseConfig: @classmethod def load(cls): - return ClickhouseConfig(cls._load_config('/var/lib/clickhouse/preprocessed_configs/config.xml')) + return ClickhouseConfig( + cls._load_config("/var/lib/clickhouse/preprocessed_configs/config.xml") + ) @staticmethod def _load_config(config_path): - with open(config_path, 'r') as file: + with open(config_path, "r") as file: return xmltodict.parse(file.read()) @classmethod @@ -591,8 +607,8 @@ class ClickhouseConfig: for key, value in list(config.items()): if isinstance(value, MutableMapping): cls._mask_secrets(config[key]) - elif key in ('password', 'secret_access_key', 'header', 'identity'): - config[key] = '*****' + elif key in ("password", "secret_access_key", "header", "identity"): + config[key] = "*****" class DiagnosticsData: @@ -603,53 +619,53 @@ class DiagnosticsData: def __init__(self, args): self.args = args self.host = args.host - self._sections = [{'section': None, 'data': {}}] + self._sections = [{"section": None, "data": {}}] def add_string(self, name, value, section=None): self._section(section)[name] = { - 'type': 'string', - 'value': value, + "type": "string", + "value": value, } def add_xml_document(self, name, document, section=None): self._section(section)[name] = { - 'type': 'xml', - 'value': document, + "type": "xml", + "value": document, } def add_query(self, name, query, result, section=None): self._section(section)[name] = { - 'type': 'query', - 'query': query, - 'result': result, + "type": "query", + "query": query, + "result": result, } def add_command(self, name, command, result, section=None): self._section(section)[name] = { - 'type': 'command', - 'command': command, - 'result': result, + "type": "command", + "command": command, + "result": result, } def dump(self, format): - if format.startswith('json'): + if format.startswith("json"): result = self._dump_json() - elif format.startswith('yaml'): + elif format.startswith("yaml"): result = self._dump_yaml() else: result = self._dump_wiki() - if format.endswith('.gz'): - compressor = gzip.GzipFile(mode='wb', fileobj=sys.stdout.buffer) + if format.endswith(".gz"): + compressor = gzip.GzipFile(mode="wb", fileobj=sys.stdout.buffer) compressor.write(result.encode()) else: print(result) def _section(self, name=None): - if self._sections[-1]['section'] != name: - self._sections.append({'section': name, 'data': {}}) + if self._sections[-1]["section"] != name: + self._sections.append({"section": name, "data": {}}) - return self._sections[-1]['data'] + return self._sections[-1]["data"] def _dump_json(self): """ @@ -669,85 +685,85 @@ class DiagnosticsData: """ def _write_title(buffer, value): - buffer.write(f'### {value}\n') + buffer.write(f"### {value}\n") def _write_subtitle(buffer, value): - buffer.write(f'#### {value}\n') + buffer.write(f"#### {value}\n") def _write_string_item(buffer, name, item): - value = item['value'] - if value != '': - value = f'**{value}**' - buffer.write(f'{name}: {value}\n') + value = item["value"] + if value != "": + value = f"**{value}**" + buffer.write(f"{name}: {value}\n") def _write_xml_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_result(buffer, item['value'], format='XML') + _write_result(buffer, item["value"], format="XML") def _write_query_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_query(buffer, item['query']) - _write_result(buffer, item['result']) + _write_query(buffer, item["query"]) + _write_result(buffer, item["result"]) def _write_command_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_command(buffer, item['command']) - _write_result(buffer, item['result']) + _write_command(buffer, item["command"]) + _write_result(buffer, item["result"]) def _write_unknown_item(buffer, section_name, name, item): if section_name: - buffer.write(f'**{name}**\n') + buffer.write(f"**{name}**\n") else: _write_subtitle(buffer, name) json.dump(item, buffer, indent=2) def _write_query(buffer, query): - buffer.write('**query**\n') - buffer.write('```sql\n') + buffer.write("**query**\n") + buffer.write("```sql\n") buffer.write(query) - buffer.write('\n```\n') + buffer.write("\n```\n") def _write_command(buffer, command): - buffer.write('**command**\n') - buffer.write('```\n') + buffer.write("**command**\n") + buffer.write("```\n") buffer.write(command) - buffer.write('\n```\n') + buffer.write("\n```\n") def _write_result(buffer, result, format=None): - buffer.write('**result**\n') - buffer.write(f'```{format}\n' if format else '```\n') + buffer.write("**result**\n") + buffer.write(f"```{format}\n" if format else "```\n") buffer.write(result) - buffer.write('\n```\n') + buffer.write("\n```\n") buffer = io.StringIO() - _write_title(buffer, f'Diagnostics data for host {self.host}') + _write_title(buffer, f"Diagnostics data for host {self.host}") for section in self._sections: - section_name = section['section'] + section_name = section["section"] if section_name: _write_subtitle(buffer, section_name) - for name, item in section['data'].items(): - if item['type'] == 'string': + for name, item in section["data"].items(): + if item["type"] == "string": _write_string_item(buffer, name, item) - elif item['type'] == 'query': + elif item["type"] == "query": _write_query_item(buffer, section_name, name, item) - elif item['type'] == 'command': + elif item["type"] == "command": _write_command_item(buffer, section_name, name, item) - elif item['type'] == 'xml': + elif item["type"] == "xml": _write_xml_item(buffer, section_name, name, item) else: _write_unknown_item(buffer, section_name, name, item) @@ -760,126 +776,196 @@ def main(): Program entry point. """ args = parse_args() - timestamp = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S') - client = ClickhouseClient(host=args.host, port=args.port, user=args.user, password=args.password) + timestamp = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") + client = ClickhouseClient( + host=args.host, port=args.port, user=args.user, password=args.password + ) ch_config = ClickhouseConfig.load() version = client.clickhouse_version - system_tables = [row[0] for row in execute_query(client, SELECT_SYSTEM_TABLES, format='JSONCompact')['data']] + system_tables = [ + row[0] + for row in execute_query(client, SELECT_SYSTEM_TABLES, format="JSONCompact")[ + "data" + ] + ] diagnostics = DiagnosticsData(args) - diagnostics.add_string('Version', version) - diagnostics.add_string('Timestamp', timestamp) - diagnostics.add_string('Uptime', execute_query(client, SELECT_UPTIME)) + diagnostics.add_string("Version", version) + diagnostics.add_string("Timestamp", timestamp) + diagnostics.add_string("Uptime", execute_query(client, SELECT_UPTIME)) - diagnostics.add_xml_document('ClickHouse configuration', ch_config.dump()) + diagnostics.add_xml_document("ClickHouse configuration", ch_config.dump()) - if version_ge(version, '20.8'): - add_query(diagnostics, 'Access configuration', - client=client, - query=SELECT_ACCESS, - format='TSVRaw') - add_query(diagnostics, 'Quotas', - client=client, - query=SELECT_QUOTA_USAGE, - format='Vertical') + if version_ge(version, "20.8"): + add_query( + diagnostics, + "Access configuration", + client=client, + query=SELECT_ACCESS, + format="TSVRaw", + ) + add_query( + diagnostics, + "Quotas", + client=client, + query=SELECT_QUOTA_USAGE, + format="Vertical", + ) - add_query(diagnostics, 'Database engines', - client=client, - query=SELECT_DATABASE_ENGINES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Databases (top 10 by size)', - client=client, - query=SELECT_DATABASES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Table engines', - client=client, - query=SELECT_TABLE_ENGINES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Dictionaries', - client=client, - query=SELECT_DICTIONARIES, - format='PrettyCompactNoEscapes', - section='Schema') + add_query( + diagnostics, + "Database engines", + client=client, + query=SELECT_DATABASE_ENGINES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Databases (top 10 by size)", + client=client, + query=SELECT_DATABASES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Table engines", + client=client, + query=SELECT_TABLE_ENGINES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Dictionaries", + client=client, + query=SELECT_DICTIONARIES, + format="PrettyCompactNoEscapes", + section="Schema", + ) - add_query(diagnostics, 'Replicated tables (top 10 by absolute delay)', - client=client, - query=SELECT_REPLICAS, - format='PrettyCompactNoEscapes', - section='Replication') - add_query(diagnostics, 'Replication queue (top 20 oldest tasks)', - client=client, - query=SELECT_REPLICATION_QUEUE, - format='Vertical', - section='Replication') - if version_ge(version, '21.3'): - add_query(diagnostics, 'Replicated fetches', - client=client, - query=SELECT_REPLICATED_FETCHES, - format='Vertical', - section='Replication') + add_query( + diagnostics, + "Replicated tables (top 10 by absolute delay)", + client=client, + query=SELECT_REPLICAS, + format="PrettyCompactNoEscapes", + section="Replication", + ) + add_query( + diagnostics, + "Replication queue (top 20 oldest tasks)", + client=client, + query=SELECT_REPLICATION_QUEUE, + format="Vertical", + section="Replication", + ) + if version_ge(version, "21.3"): + add_query( + diagnostics, + "Replicated fetches", + client=client, + query=SELECT_REPLICATED_FETCHES, + format="Vertical", + section="Replication", + ) - add_query(diagnostics, 'Top 10 tables by max parts per partition', - client=client, - query=SELECT_PARTS_PER_TABLE, - format='PrettyCompactNoEscapes') - add_query(diagnostics, 'Merges in progress', - client=client, - query=SELECT_MERGES, - format='Vertical') - add_query(diagnostics, 'Mutations in progress', - client=client, - query=SELECT_MUTATIONS, - format='Vertical') - add_query(diagnostics, 'Recent data parts (modification time within last 3 minutes)', - client=client, - query=SELECT_RECENT_DATA_PARTS, - format='Vertical') + add_query( + diagnostics, + "Top 10 tables by max parts per partition", + client=client, + query=SELECT_PARTS_PER_TABLE, + format="PrettyCompactNoEscapes", + ) + add_query( + diagnostics, + "Merges in progress", + client=client, + query=SELECT_MERGES, + format="Vertical", + ) + add_query( + diagnostics, + "Mutations in progress", + client=client, + query=SELECT_MUTATIONS, + format="Vertical", + ) + add_query( + diagnostics, + "Recent data parts (modification time within last 3 minutes)", + client=client, + query=SELECT_RECENT_DATA_PARTS, + format="Vertical", + ) - add_query(diagnostics, 'system.detached_parts', - client=client, - query=SELECT_DETACHED_DATA_PARTS, - format='PrettyCompactNoEscapes', - section='Detached data') - add_command(diagnostics, 'Disk space usage', - command='du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh', - section='Detached data') + add_query( + diagnostics, + "system.detached_parts", + client=client, + query=SELECT_DETACHED_DATA_PARTS, + format="PrettyCompactNoEscapes", + section="Detached data", + ) + add_command( + diagnostics, + "Disk space usage", + command="du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh", + section="Detached data", + ) - add_query(diagnostics, 'Queries in progress (process list)', - client=client, - query=SELECT_PROCESSES, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Top 10 queries by duration', - client=client, - query=SELECT_TOP_QUERIES_BY_DURATION, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Top 10 queries by memory usage', - client=client, - query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Last 10 failed queries', - client=client, - query=SELECT_FAILED_QUERIES, - format='Vertical', - section='Queries') + add_query( + diagnostics, + "Queries in progress (process list)", + client=client, + query=SELECT_PROCESSES, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Top 10 queries by duration", + client=client, + query=SELECT_TOP_QUERIES_BY_DURATION, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Top 10 queries by memory usage", + client=client, + query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Last 10 failed queries", + client=client, + query=SELECT_FAILED_QUERIES, + format="Vertical", + section="Queries", + ) - add_query(diagnostics, 'Stack traces', - client=client, - query=SELECT_STACK_TRACES, - format='Vertical') + add_query( + diagnostics, + "Stack traces", + client=client, + query=SELECT_STACK_TRACES, + format="Vertical", + ) - if 'crash_log' in system_tables: - add_query(diagnostics, 'Crash log', - client=client, - query=SELECT_CRASH_LOG, - format='Vertical') + if "crash_log" in system_tables: + add_query( + diagnostics, + "Crash log", + client=client, + query=SELECT_CRASH_LOG, + format="Vertical", + ) - add_command(diagnostics, 'uname', 'uname -a') + add_command(diagnostics, "uname", "uname -a") diagnostics.dump(args.format) @@ -889,29 +975,34 @@ def parse_args(): Parse command-line arguments. """ parser = argparse.ArgumentParser() - parser.add_argument('--format', - choices=['json', 'yaml', 'json.gz', 'yaml.gz', 'wiki', 'wiki.gz'], - default='wiki') - parser.add_argument('--normalize-queries', - action='store_true', - default=False) - parser.add_argument('--host', dest="host", help="clickhouse host") - parser.add_argument('--port', dest="port", default=8123, help="clickhouse http port") - parser.add_argument('--user', dest="user", default="default", help="clickhouse user") - parser.add_argument('--password', dest="password", help="clickhouse password") + parser.add_argument( + "--format", + choices=["json", "yaml", "json.gz", "yaml.gz", "wiki", "wiki.gz"], + default="wiki", + ) + parser.add_argument("--normalize-queries", action="store_true", default=False) + parser.add_argument("--host", dest="host", help="clickhouse host") + parser.add_argument( + "--port", dest="port", default=8123, help="clickhouse http port" + ) + parser.add_argument( + "--user", dest="user", default="default", help="clickhouse user" + ) + parser.add_argument("--password", dest="password", help="clickhouse password") return parser.parse_args() def add_query(diagnostics, name, client, query, format, section=None): query_args = { - 'normalize_queries': diagnostics.args.normalize_queries, + "normalize_queries": diagnostics.args.normalize_queries, } query = client.render_query(query, **query_args) diagnostics.add_query( name=name, query=query, result=execute_query(client, query, render_query=False, format=format), - section=section) + section=section, + ) def execute_query(client, query, render_query=True, format=None): @@ -926,14 +1017,18 @@ def execute_query(client, query, render_query=True, format=None): def add_command(diagnostics, name, command, section=None): diagnostics.add_command( - name=name, - command=command, - result=execute_command(command), - section=section) + name=name, command=command, result=execute_command(command), section=section + ) def execute_command(command, input=None): - proc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc = subprocess.Popen( + command, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) if isinstance(input, str): input = input.encode() @@ -941,7 +1036,7 @@ def execute_command(command, input=None): stdout, stderr = proc.communicate(input=input) if proc.returncode: - return f'failed with exit code {proc.returncode}\n{stderr.decode()}' + return f"failed with exit code {proc.returncode}\n{stderr.decode()}" return stdout.decode() @@ -957,8 +1052,8 @@ def parse_version(version): """ Parse version string. """ - return [int(x) for x in version.strip().split('.') if x.isnumeric()] + return [int(x) for x in version.strip().split(".") if x.isnumeric()] -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/utils/data-lakes-importer.py b/utils/data-lakes-importer.py new file mode 100755 index 00000000000..aa03f15b1c0 --- /dev/null +++ b/utils/data-lakes-importer.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import os +import sys +import pyspark +from delta import * # pip install delta-spark + +# Usage example: +# ./data-lakes-importer.py iceberg data.parquet result_path + + +def get_spark_for_iceberg(result_path): + builder = ( + pyspark.sql.SparkSession.builder.appName("spark_test") + .config( + "spark.jars.packages", + "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0", + ) + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.iceberg.spark.SparkSessionCatalog", + ) + .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.spark_catalog.type", "hadoop") + .config("spark.sql.catalog.spark_catalog.warehouse", result_path) + .master("local") + ) + return builder.master("local").getOrCreate() + + +def get_spark_for_delta(): + builder = ( + pyspark.sql.SparkSession.builder.appName("spark_test") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) + .master("local") + ) + + return configure_spark_with_delta_pip(builder).master("local").getOrCreate() + + +def get_spark_for_hudi(): + builder = ( + pyspark.sql.SparkSession.builder.appName("spark_test") + .config( + "spark.jars.packages", + "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0", + ) + .config( + "org.apache.spark.sql.hudi.catalog.HoodieCatalog", + ) + .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config( + "spark.sql.catalog.local", "org.apache.spark.sql.hudi.catalog.HoodieCatalog" + ) + .config( + "spark.driver.memory", "20g" + ) # .config('spark.sql.extensions", "org.apache.spark.sql.hudi.HoodieSparkSessionExtension') + .master("local") + ) + return builder.master("local").getOrCreate() + + +def main(): + data_lake_name = str(sys.argv[1]).strip() + file_path = sys.argv[2] + result_path = sys.argv[3] + + if not file_path.startswith("/"): + print(f"Expected absolute path, got relative: {file_path}") + exit(1) + + if not result_path.startswith("/"): + print(f"Expected absolute path, got relative: {result_path}") + exit(1) + + spark = None + if data_lake_name == "iceberg": + spark = get_spark_for_iceberg(result_path) + spark.conf.set("spark.sql.debug.maxToStringFields", 100000) + spark.read.load(f"file://{file_path}").writeTo("iceberg_table").using( + "iceberg" + ).create() + elif data_lake_name == "delta": + spark = get_spark_for_delta() + spark.conf.set("spark.sql.debug.maxToStringFields", 100000) + spark.read.load(f"file://{file_path}").write.mode("overwrite").option( + "compression", "none" + ).format("delta").option("delta.columnMapping.mode", "name").save(result_path) + elif data_lake_name == "hudi": + spark = get_spark_for_hudi() + spark.conf.set("spark.sql.debug.maxToStringFields", 100000) + spark.read.load(f"file://{file_path}").write.mode("overwrite").option( + "compression", "none" + ).format("hudi").option("hoodie.table.name", "hudi").option( + "hoodie.datasource.write.partitionpath.field", "partitionpath" + ).option( + "hoodie.datasource.write.table.name", "hudi" + ).option( + "hoodie.datasource.write.recordkey.field", "ts" + ).option( + "hoodie.datasource.write.precombine.field", "ts" + ).option( + "hoodie.datasource.write.operation", "insert_overwrite" + ).save( + result_path + ) + else: + print( + f"Unknown data lake name {data_lake_name}. Support only: 'iceberg', 'delta'" + ) + exit(1) + + +if __name__ == "__main__": + main() diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 2596be4addd..49ce2068246 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -1,2 +1,7 @@ +if (NOT TARGET ch_contrib::rapidjson) + message (${RECONFIGURE_MESSAGE_LEVEL} "Not building keeper-bench due to rapidjson is disabled") + return() +endif () + clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log ch_contrib::rapidjson) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 5d1d0f8a491..2212f7158ae 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -1,16 +1,18 @@ #include "Generator.h" +#include "Common/Exception.h" +#include "Common/ZooKeeper/ZooKeeperCommon.h" +#include #include #include +#include using namespace Coordination; using namespace zkutil; -namespace DB -{ -namespace ErrorCodes +namespace DB::ErrorCodes { extern const int LOGICAL_ERROR; -} + extern const int BAD_ARGUMENTS; } namespace @@ -38,16 +40,6 @@ std::string generateRandomString(size_t length) } } -std::string generateRandomPath(const std::string & prefix, size_t length) -{ - return std::filesystem::path(prefix) / generateRandomString(length); -} - -std::string generateRandomData(size_t size) -{ - return generateRandomString(size); -} - void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) { namespace fs = std::filesystem; @@ -96,126 +88,629 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa remove_future.get(); } - -void CreateRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +NumberGetter +NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value) { - removeRecursive(zookeeper, path_prefix); + NumberGetter number_getter; - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) + if (!config.has(key) && default_value.has_value()) { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); + number_getter.value = *default_value; + } + else if (config.has(key + ".min_value") && config.has(key + ".max_value")) + { + NumberRange range{.min_value = config.getUInt64(key + ".min_value"), .max_value = config.getUInt64(key + ".max_value")}; + if (range.max_value <= range.min_value) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Range is invalid for key {}: [{}, {}]", key, range.min_value, range.max_value); + number_getter.value = range; + } + else + { + number_getter.value = config.getUInt64(key); + } + + return number_getter; } -ZooKeeperRequestPtr CreateRequestGenerator::generate() +std::string NumberGetter::description() const { + if (const auto * number = std::get_if(&value)) + return std::to_string(*number); + + const auto & range = std::get(value); + return fmt::format("random value from range [{}, {}]", range.min_value, range.max_value); +} + +uint64_t NumberGetter::getNumber() const +{ + if (const auto * number = std::get_if(&value)) + return *number; + + const auto & range = std::get(value); + static pcg64 rng(randomSeed()); + return std::uniform_int_distribution(range.min_value, range.max_value)(rng); +} + +StringGetter StringGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + StringGetter string_getter; + if (config.has(key + ".random_string")) + string_getter.value + = NumberGetter::fromConfig(key + ".random_string.size", config); + else + string_getter.value = config.getString(key); + + return string_getter; +} + +void StringGetter::setString(std::string name) +{ + value = std::move(name); +} + +std::string StringGetter::getString() const +{ + if (const auto * string = std::get_if(&value)) + return *string; + + const auto number_getter = std::get(value); + return generateRandomString(number_getter.getNumber()); +} + +std::string StringGetter::description() const +{ + if (const auto * string = std::get_if(&value)) + return *string; + + const auto number_getter = std::get(value); + return fmt::format("random string with size of {}", number_getter.description()); +} + +bool StringGetter::isRandom() const +{ + return std::holds_alternative(value); +} + +PathGetter PathGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + static constexpr std::string_view path_key_string = "path"; + + PathGetter path_getter; + Poco::Util::AbstractConfiguration::Keys path_keys; + config.keys(key, path_keys); + + for (const auto & path_key : path_keys) + { + if (!path_key.starts_with(path_key_string)) + continue; + + const auto current_path_key_string = key + "." + path_key; + const auto children_of_key = current_path_key_string + ".children_of"; + if (config.has(children_of_key)) + { + auto parent_node = config.getString(children_of_key); + if (parent_node.empty() || parent_node[0] != '/') + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path for request generator: '{}'", parent_node); + path_getter.parent_paths.push_back(std::move(parent_node)); + } + else + { + auto path = config.getString(key + "." + path_key); + + if (path.empty() || path[0] != '/') + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path for request generator: '{}'", path); + + path_getter.paths.push_back(std::move(path)); + } + } + + path_getter.path_picker = std::uniform_int_distribution(0, path_getter.paths.size() - 1); + return path_getter; +} + +void PathGetter::initialize(Coordination::ZooKeeper & zookeeper) +{ + for (const auto & parent_path : parent_paths) + { + auto list_promise = std::make_shared>(); + auto list_future = list_promise->get_future(); + auto callback = [list_promise] (const ListResponse & response) + { + if (response.error != Coordination::Error::ZOK) + list_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); + else + list_promise->set_value(response); + }; + zookeeper.list(parent_path, ListRequestType::ALL, std::move(callback), {}); + auto list_response = list_future.get(); + + for (const auto & child : list_response.names) + paths.push_back(std::filesystem::path(parent_path) / child); + } + + path_picker = std::uniform_int_distribution(0, paths.size() - 1); + initialized = true; +} + +std::string PathGetter::getPath() const +{ + if (!initialized) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "PathGetter is not initialized"); + + if (paths.size() == 1) + return paths[0]; + + static pcg64 rng(randomSeed()); + return paths[path_picker(rng)]; +} + +std::string PathGetter::description() const +{ + std::string description; + for (const auto & path : parent_paths) + { + if (!description.empty()) + description += ", "; + description += fmt::format("children of {}", path); + } + + for (const auto & path : paths) + { + if (!description.empty()) + description += ", "; + description += path; + } + + return description; +} + +RequestGetter::RequestGetter(std::vector request_generators_) + : request_generators(std::move(request_generators_)) +{} + +RequestGetter RequestGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi) +{ + RequestGetter request_getter; + + Poco::Util::AbstractConfiguration::Keys generator_keys; + config.keys(key, generator_keys); + + bool use_weights = false; + size_t weight_sum = 0; + auto & generators = request_getter.request_generators; + for (const auto & generator_key : generator_keys) + { + RequestGeneratorPtr request_generator; + + if (generator_key.starts_with("create")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("set")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("get")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("list")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("multi")) + { + if (for_multi) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Nested multi requests are not allowed"); + request_generator = std::make_unique(); + } + else + { + if (for_multi) + continue; + + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown generator {}", key + "." + generator_key); + } + + request_generator->getFromConfig(key + "." + generator_key, config); + + auto weight = request_generator->getWeight(); + use_weights |= weight != 1; + weight_sum += weight; + + generators.push_back(std::move(request_generator)); + } + + if (generators.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No request generators found in config for key '{}'", key); + + + size_t max_value = use_weights ? weight_sum - 1 : generators.size() - 1; + request_getter.request_generator_picker = std::uniform_int_distribution(0, max_value); + + /// construct weight vector + if (use_weights) + { + auto & weights = request_getter.weights; + weights.reserve(generators.size()); + weights.push_back(generators[0]->getWeight() - 1); + + for (size_t i = 1; i < generators.size(); ++i) + weights.push_back(weights.back() + generators[i]->getWeight()); + } + + return request_getter; +} + +RequestGeneratorPtr RequestGetter::getRequestGenerator() const +{ + static pcg64 rng(randomSeed()); + + auto random_number = request_generator_picker(rng); + + if (weights.empty()) + return request_generators[random_number]; + + for (size_t i = 0; i < request_generators.size(); ++i) + { + if (random_number <= weights[i]) + return request_generators[i]; + } + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid number generated: {}", random_number); +} + +std::string RequestGetter::description() const +{ + std::string guard(30, '-'); + std::string description = guard; + + for (const auto & request_generator : request_generators) + description += fmt::format("\n{}\n", request_generator->description()); + return description + guard; +} + +void RequestGetter::startup(Coordination::ZooKeeper & zookeeper) +{ + for (const auto & request_generator : request_generators) + request_generator->startup(zookeeper); +} + +const std::vector & RequestGetter::requestGenerators() const +{ + return request_generators; +} + +void RequestGenerator::getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + if (config.has(key + ".weight")) + weight = config.getUInt64(key + ".weight"); + getFromConfigImpl(key, config); +} + +std::string RequestGenerator::description() +{ + std::string weight_string = weight == 1 ? "" : fmt::format("\n- weight: {}", weight); + return fmt::format("{}{}", descriptionImpl(), weight_string); +} + +Coordination::ZooKeeperRequestPtr RequestGenerator::generate(const Coordination::ACLs & acls) +{ + return generateImpl(acls); +} + +void RequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +{ + startupImpl(zookeeper); +} + +size_t RequestGenerator::getWeight() const +{ + return weight; +} + +CreateRequestGenerator::CreateRequestGenerator() + : rng(randomSeed()) + , remove_picker(0, 1.0) +{} + +void CreateRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + parent_path = PathGetter::fromConfig(key, config); + + name = StringGetter(NumberGetter::fromConfig(key + ".name_length", config, 5)); + + if (config.has(key + ".data")) + data = StringGetter::fromConfig(key + ".data", config); + + if (config.has(key + ".remove_factor")) + remove_factor = config.getDouble(key + ".remove_factor"); +} + +std::string CreateRequestGenerator::descriptionImpl() +{ + std::string data_string + = data.has_value() ? fmt::format("data for created nodes: {}", data->description()) : "no data for created nodes"; + std::string remove_factor_string + = remove_factor.has_value() ? fmt::format("- remove factor: {}", *remove_factor) : "- without removes"; + return fmt::format( + "Create Request Generator\n" + "- parent path(s) for created nodes: {}\n" + "- name for created nodes: {}\n" + "- {}\n" + "{}", + parent_path.description(), + name.description(), + data_string, + remove_factor_string); +} + +void CreateRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + parent_path.initialize(zookeeper); +} + +Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coordination::ACLs & acls) +{ + if (remove_factor.has_value() && !paths_created.empty() && remove_picker(rng) < *remove_factor) + { + auto request = std::make_shared(); + auto it = paths_created.begin(); + request->path = *it; + paths_created.erase(it); + return request; + } + auto request = std::make_shared(); - request->acls = default_acls; - size_t plength = 5; - if (path_length) - plength = *path_length; - auto path_candidate = generateRandomPath(path_prefix, plength); + request->acls = acls; + + std::string path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString(); while (paths_created.contains(path_candidate)) - path_candidate = generateRandomPath(path_prefix, plength); + path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString(); paths_created.insert(path_candidate); - request->path = path_candidate; - if (data_size) - request->data = generateRandomData(*data_size); + request->path = std::move(path_candidate); + + if (data) + request->data = data->getString(); return request; } - -void SetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +void SetRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) { - removeRecursive(zookeeper, path_prefix); + path = PathGetter::fromConfig(key, config); - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); + data = StringGetter::fromConfig(key + ".data", config); } -ZooKeeperRequestPtr SetRequestGenerator::generate() +std::string SetRequestGenerator::descriptionImpl() +{ + return fmt::format( + "Set Request Generator\n" + "- path(s) to set: {}\n" + "- data to set: {}", + path.description(), + data.description()); +} + +Coordination::ZooKeeperRequestPtr SetRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) { auto request = std::make_shared(); - request->path = path_prefix; - request->data = generateRandomData(data_size); - + request->path = path.getPath(); + request->data = data.getString(); return request; } - -void GetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +void SetRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) { - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); - size_t total_nodes = 1; - if (num_nodes) - total_nodes = *num_nodes; - - for (size_t i = 0; i < total_nodes; ++i) - { - auto path = generateRandomPath(path_prefix, 5); - while (std::find(paths_to_get.begin(), paths_to_get.end(), path) != paths_to_get.end()) - path = generateRandomPath(path_prefix, 5); - - auto create_promise = std::make_shared>(); - auto create_future = create_promise->get_future(); - auto callback = [create_promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - create_promise->set_value(); - }; - std::string data; - if (nodes_data_size) - data = generateRandomString(*nodes_data_size); - - zookeeper.create(path, data, false, false, default_acls, callback); - create_future.get(); - paths_to_get.push_back(path); - } + path.initialize(zookeeper); } -Coordination::ZooKeeperRequestPtr GetRequestGenerator::generate() +void GetRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + path = PathGetter::fromConfig(key, config); +} + +std::string GetRequestGenerator::descriptionImpl() +{ + return fmt::format( + "Get Request Generator\n" + "- path(s) to get: {}", + path.description()); +} + +Coordination::ZooKeeperRequestPtr GetRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) { auto request = std::make_shared(); - - size_t path_index = distribution(rng); - request->path = paths_to_get[path_index]; + request->path = path.getPath(); return request; } -void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +void GetRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) { + path.initialize(zookeeper); +} + +void ListRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + path = PathGetter::fromConfig(key, config); +} + +std::string ListRequestGenerator::descriptionImpl() +{ + return fmt::format( + "List Request Generator\n" + "- path(s) to get: {}", + path.description()); +} + +Coordination::ZooKeeperRequestPtr ListRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) +{ + auto request = std::make_shared(); + request->path = path.getPath(); + return request; +} + +void ListRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + path.initialize(zookeeper); +} + +void MultiRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + if (config.has(key + ".size")) + size = NumberGetter::fromConfig(key + ".size", config); + + request_getter = RequestGetter::fromConfig(key, config, /*for_multi*/ true); +}; + +std::string MultiRequestGenerator::descriptionImpl() +{ + std::string size_string = size.has_value() ? fmt::format("- number of requests: {}\n", size->description()) : ""; + return fmt::format( + "Multi Request Generator\n" + "{}" + "- requests:\n{}", + size_string, + request_getter.description()); +} + +Coordination::ZooKeeperRequestPtr MultiRequestGenerator::generateImpl(const Coordination::ACLs & acls) +{ + Coordination::Requests ops; + + if (size) + { + auto request_count = size->getNumber(); + + for (size_t i = 0; i < request_count; ++i) + ops.push_back(request_getter.getRequestGenerator()->generate(acls)); + } + else + { + for (const auto & request_generator : request_getter.requestGenerators()) + ops.push_back(request_generator->generate(acls)); + } + + return std::make_shared(ops, acls); +} + +void MultiRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + request_getter.startup(zookeeper); +} + +Generator::Generator(const Poco::Util::AbstractConfiguration & config) +{ + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + + static const std::string generator_key = "generator"; + + std::cerr << "---- Parsing setup ---- " << std::endl; + static const std::string setup_key = generator_key + ".setup"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(setup_key, keys); + for (const auto & key : keys) + { + if (key.starts_with("node")) + { + auto node_key = setup_key + "." + key; + auto parsed_root_node = parseNode(node_key, config); + const auto node = root_nodes.emplace_back(parsed_root_node); + + if (config.has(node_key + ".repeat")) + { + if (!node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); + + auto repeat_count = config.getUInt64(node_key + ".repeat"); + node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + root_nodes.emplace_back(node->clone()); + } + + std::cerr << "Tree to create:" << std::endl; + + node->dumpTree(); + std::cerr << std::endl; + } + } + std::cerr << "---- Done parsing data setup ----\n" << std::endl; + + std::cerr << "---- Collecting request generators ----" << std::endl; + static const std::string requests_key = generator_key + ".requests"; + request_getter = RequestGetter::fromConfig(requests_key, config); + std::cerr << request_getter.description() << std::endl; + std::cerr << "---- Done collecting request generators ----\n" << std::endl; +} + +std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + auto node = std::make_shared(); + node->name = StringGetter::fromConfig(key + ".name", config); + + if (config.has(key + ".data")) + node->data = StringGetter::fromConfig(key + ".data", config); + + Poco::Util::AbstractConfiguration::Keys node_keys; + config.keys(key, node_keys); + + for (const auto & node_key : node_keys) + { + if (!node_key.starts_with("node")) + continue; + + const auto node_key_string = key + "." + node_key; + auto child_node = parseNode(node_key_string, config); + node->children.push_back(child_node); + + if (config.has(node_key_string + ".repeat")) + { + if (!child_node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); + + auto repeat_count = config.getUInt64(node_key_string + ".repeat"); + child_node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + node->children.push_back(child_node); + } + } + + return node; +} + +void Generator::Node::dumpTree(int level) const +{ + std::string data_string + = data.has_value() ? fmt::format("{}", data->description()) : "no data"; + + std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; + + std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; + + for (auto it = children.begin(); it != children.end();) + { + const auto & child = *it; + child->dumpTree(level + 1); + std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); + } +} + +std::shared_ptr Generator::Node::clone() const +{ + auto new_node = std::make_shared(); + new_node->name = name; + new_node->data = data; + new_node->repeat_count = repeat_count; + + // don't do deep copy of children because we will do clone only for root nodes + new_node->children = children; + + return new_node; +} + +void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const +{ + auto path = std::filesystem::path(parent_path) / name.getString(); auto promise = std::make_shared>(); auto future = promise->get_future(); auto create_callback = [promise] (const CreateResponse & response) @@ -225,97 +720,47 @@ void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) else promise->set_value(); }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); + zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); future.get(); - size_t total_nodes = 1; - if (num_nodes) - total_nodes = *num_nodes; - - size_t path_length = 5; - if (paths_length) - path_length = *paths_length; - - for (size_t i = 0; i < total_nodes; ++i) - { - auto path = generateRandomPath(path_prefix, path_length); - - auto create_promise = std::make_shared>(); - auto create_future = create_promise->get_future(); - auto callback = [create_promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - create_promise->set_value(); - }; - zookeeper.create(path, "", false, false, default_acls, callback); - create_future.get(); - } + for (const auto & child : children) + child->createNode(zookeeper, path, acls); } -Coordination::ZooKeeperRequestPtr ListRequestGenerator::generate() +void Generator::startup(Coordination::ZooKeeper & zookeeper) { - auto request = std::make_shared(); - request->path = path_prefix; - return request; + std::cerr << "---- Creating test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + node->name.setString(node_name); + + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + + node->createNode(zookeeper, "/", default_acls); + } + std::cerr << "---- Created test data ----\n" << std::endl; + + std::cerr << "---- Initializing generators ----" << std::endl; + + request_getter.startup(zookeeper); } -std::unique_ptr getGenerator(const std::string & name) +Coordination::ZooKeeperRequestPtr Generator::generate() { - if (name == "create_no_data") - { - return std::make_unique(); - } - else if (name == "create_small_data") - { - return std::make_unique("/create_generator", 5, 32); - } - else if (name == "create_medium_data") - { - return std::make_unique("/create_generator", 5, 1024); - } - else if (name == "create_big_data") - { - return std::make_unique("/create_generator", 5, 512 * 1024); - } - else if (name == "get_no_data") - { - return std::make_unique("/get_generator", 10, 0); - } - else if (name == "get_small_data") - { - return std::make_unique("/get_generator", 10, 32); - } - else if (name == "get_medium_data") - { - return std::make_unique("/get_generator", 10, 1024); - } - else if (name == "get_big_data") - { - return std::make_unique("/get_generator", 10, 512 * 1024); - } - else if (name == "list_no_nodes") - { - return std::make_unique("/list_generator", 0, 1); - } - else if (name == "list_few_nodes") - { - return std::make_unique("/list_generator", 10, 5); - } - else if (name == "list_medium_nodes") - { - return std::make_unique("/list_generator", 1000, 5); - } - else if (name == "list_a_lot_nodes") - { - return std::make_unique("/list_generator", 100000, 5); - } - else if (name == "set_small_data") - { - return std::make_unique("/set_generator", 5); - } - - - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", name); + return request_getter.getRequestGenerator()->generate(default_acls); +} + +void Generator::cleanup(Coordination::ZooKeeper & zookeeper) +{ + std::cerr << "---- Cleaning up test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + std::string root_path = std::filesystem::path("/") / node_name; + std::cerr << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + } } diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 1ff01b25ed4..5b4c05b2d8b 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -6,121 +6,194 @@ #include #include #include +#include #include - -std::string generateRandomPath(const std::string & prefix, size_t length = 5); - -std::string generateRandomData(size_t size); - -class IGenerator +struct NumberGetter { -public: - IGenerator() + static NumberGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value = std::nullopt); + uint64_t getNumber() const; + std::string description() const; +private: + struct NumberRange { - Coordination::ACL acl; - acl.permissions = Coordination::ACL::All; - acl.scheme = "world"; - acl.id = "anyone"; - default_acls.emplace_back(std::move(acl)); - } - virtual void startup(Coordination::ZooKeeper & /*zookeeper*/) {} - virtual Coordination::ZooKeeperRequestPtr generate() = 0; - - virtual ~IGenerator() = default; - - Coordination::ACLs default_acls; + uint64_t min_value; + uint64_t max_value; + }; + std::variant value; }; -class CreateRequestGenerator final : public IGenerator +struct StringGetter { -public: - explicit CreateRequestGenerator( - std::string path_prefix_ = "/create_generator", - std::optional path_length_ = std::nullopt, - std::optional data_size_ = std::nullopt) - : path_prefix(path_prefix_) - , path_length(path_length_) - , data_size(data_size_) + explicit StringGetter(NumberGetter number_getter) + : value(std::move(number_getter)) {} - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; + StringGetter() = default; + static StringGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config); + void setString(std::string name); + std::string getString() const; + std::string description() const; + bool isRandom() const; private: - std::string path_prefix; - std::optional path_length; - std::optional data_size; + std::variant value; +}; + +struct PathGetter +{ + static PathGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::string getPath() const; + std::string description() const; + + void initialize(Coordination::ZooKeeper & zookeeper); +private: + std::vector parent_paths; + + bool initialized = false; + + std::vector paths; + mutable std::uniform_int_distribution path_picker; +}; + +struct RequestGenerator +{ + virtual ~RequestGenerator() = default; + + void getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + Coordination::ZooKeeperRequestPtr generate(const Coordination::ACLs & acls); + + std::string description(); + + void startup(Coordination::ZooKeeper & zookeeper); + + size_t getWeight() const; +private: + virtual void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) = 0; + virtual std::string descriptionImpl() = 0; + virtual Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) = 0; + virtual void startupImpl(Coordination::ZooKeeper &) {} + + size_t weight = 1; +}; + +using RequestGeneratorPtr = std::shared_ptr; + +struct CreateRequestGenerator final : public RequestGenerator +{ + CreateRequestGenerator(); +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + PathGetter parent_path; + StringGetter name; + std::optional data; + + std::optional remove_factor; + pcg64 rng; + std::uniform_real_distribution remove_picker; + std::unordered_set paths_created; }; - -class GetRequestGenerator final : public IGenerator +struct SetRequestGenerator final : public RequestGenerator { -public: - explicit GetRequestGenerator( - std::string path_prefix_ = "/get_generator", - std::optional num_nodes_ = std::nullopt, - std::optional nodes_data_size_ = std::nullopt) - : path_prefix(path_prefix_) - , num_nodes(num_nodes_) - , nodes_data_size(nodes_data_size_) - , rng(randomSeed()) - , distribution(0, num_nodes ? *num_nodes - 1 : 0) - {} - - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; - private: - std::string path_prefix; - std::optional num_nodes; - std::optional nodes_data_size; - std::vector paths_to_get; + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; - pcg64 rng; - std::uniform_int_distribution distribution; + PathGetter path; + StringGetter data; }; -class ListRequestGenerator final : public IGenerator +struct GetRequestGenerator final : public RequestGenerator { -public: - explicit ListRequestGenerator( - std::string path_prefix_ = "/list_generator", - std::optional num_nodes_ = std::nullopt, - std::optional paths_length_ = std::nullopt) - : path_prefix(path_prefix_) - , num_nodes(num_nodes_) - , paths_length(paths_length_) - {} - - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; - private: - std::string path_prefix; - std::optional num_nodes; - std::optional paths_length; + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + PathGetter path; }; -class SetRequestGenerator final : public IGenerator +struct ListRequestGenerator final : public RequestGenerator { -public: - explicit SetRequestGenerator( - std::string path_prefix_ = "/set_generator", - uint64_t data_size_ = 5) - : path_prefix(path_prefix_) - , data_size(data_size_) - {} - - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; - private: - std::string path_prefix; - uint64_t data_size; + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + PathGetter path; }; +struct RequestGetter +{ + explicit RequestGetter(std::vector request_generators_); -std::unique_ptr getGenerator(const std::string & name); + RequestGetter() = default; + + static RequestGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi = false); + + RequestGeneratorPtr getRequestGenerator() const; + std::string description() const; + void startup(Coordination::ZooKeeper & zookeeper); + const std::vector & requestGenerators() const; +private: + std::vector request_generators; + std::vector weights; + mutable std::uniform_int_distribution request_generator_picker; +}; + +struct MultiRequestGenerator final : public RequestGenerator +{ +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + std::optional size; + RequestGetter request_getter; +}; + +class Generator +{ +public: + explicit Generator(const Poco::Util::AbstractConfiguration & config); + + void startup(Coordination::ZooKeeper & zookeeper); + Coordination::ZooKeeperRequestPtr generate(); + void cleanup(Coordination::ZooKeeper & zookeeper); +private: + struct Node + { + StringGetter name; + std::optional data; + std::vector> children; + size_t repeat_count = 0; + + std::shared_ptr clone() const; + + void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; + void dumpTree(int level = 0) const; + }; + + static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::uniform_int_distribution request_picker; + std::vector> root_nodes; + RequestGetter request_getter; + Coordination::ACLs default_acls; +}; + +std::optional getGenerator(const std::string & name); diff --git a/utils/keeper-bench/README.md b/utils/keeper-bench/README.md new file mode 100644 index 00000000000..8b498228799 --- /dev/null +++ b/utils/keeper-bench/README.md @@ -0,0 +1,317 @@ +# Keeper Bench + +Keeper Bench is a tool for benchmarking Keeper or any ZooKeeper compatible systems. + +To run it call following command from the build folder: + +``` +./utils/keeper-bench --config benchmark_config_file.yaml +``` + +## Configuration file + +Keeper Bench runs need to be configured inside a yaml or XML file. +An example of a configuration file can be found in `./utils/keeper-bench/example.yaml` + +### Table of contents +- [Special Types](#special-types) +- [General settings](#general-settings) +- [Connections](#connections) +- [Generator](#generator) +- [Output](#output) + + +## Special types + +### IntegerGetter + +Can be defined with constant integer or as a random value from a range. + +```yaml +key: integer +key: + min_value: integer + max_value: integer +``` + +Example for a constant value: + +```yaml +some_key: 2 +``` + +Example for random value from [10, 20]: + +```yaml +some_key: + min_value: 10 + max_value: 20 +``` + +### StringGetter + +Can be defined with constant string or as a random string of some size. + +```yaml +key: string +key: + random_string: + size: IntegerGetter +``` + +Example for a constant value: +```yaml +some_key: "string" +``` + +Example for a random string with a random size from [10, 20]: +```yaml +some_key: + random_string: + size: + min_value: 10 + max_value: 20 +``` + + +### PathGetter + +If a section contains one or more `path` keys, all `path` keys are collected into a list. \ +Additionally, paths can be defined with key `children_of` which will add all children of some path to the list. + +```yaml +path: string +path: + children_of: string +``` + +Example for defining list of paths (`/path1`, `/path2` and children of `/path3`): + +```yaml +main: + path: + - "/path1" + - "/path2" + path: + children_of: "/path3" +``` + + +## General settings + +```yaml +# number of parallel queries (default: 1) +concurrency: integer + +# amount of queries to be executed, set 0 to disable limit (default: 0) +iterations: integer + +# delay between intermediate reports in seconds, set 0 to disable reports (default: 1.0) +report_delay: double + +# stop launch of queries after specified time limit, set 0 to disable limit (default: 0) +timelimit: double + +# continue testing even if a query fails (default: false) +continue_on_errors: boolean +``` + + +## Connections + +Connection definitions that will be used throughout tests defined under `connections` key. + +Following configurations can be defined under `connections` key or for each specific connection. \ +If it's defined under `connections` key, it will be used by default unless a specific connection overrides it. + +```yaml +secure: boolean +operation_timeout_ms: integer +session_timeout_ms: integer +connection_timeout_ms: integer +``` + +Specific configuration can be defined with a string or with a detailed description. + +```yaml +host: string +connection: + host: string + + # number of sessions to create for host + sessions: integer + # any connection configuration defined above +``` + +Example definition of 3 connections in total, 1 to `localhost:9181` and 2 to `localhost:9182` both will use secure connections: + +```yaml +connections: + secure: true + + host: "localhost:9181" + connection: + host: "localhost:9182" + sessions: 2 +``` + + +## Generator + +Main part of the benchmark is the generator itself which creates necessary nodes and defines how the requests will be generated. \ +It is defined under `generator` key. + +### Setup + +Setup defines nodes that are needed for test, defined under `setup` key. + +Each node is defined with a `node` key in the following format: + +```yaml +node: StringGetter + +node: + name: StringGetter + data: StringGetter + repeat: integer + node: Node +``` + +If only string is defined, a node with that name will be created. \ +Otherwise more detailed definition could be included to set data or the children of the node. \ +If `repeat` key is set, the node definition will be used multiple times. For a `repeat` key to be valid, the name of the node needs to be a random string. + +Example for a setup: + +```yaml +generator: + setup: + node: "node1" + node: + name: + random_string: + size: 20 + data: "somedata" + repeat: 4 + node: + name: + random_string: + size: 10 + repeat: 2 +``` + +We will create node `/node1` with no data and 4 children of random name of size 20 and data set to `somedata`. \ +We will also create 2 nodes with no data and random name of size 10 under `/` node. + +### Requests + +While benchmark is running, we are generating requests. + +Request generator is defined under `requests` key. \ +For each request `weight` (default: 1) can be defined which defines preference for a certain request. + +#### `create` + +```yaml +create: + # parent path for created nodes + path: string + + # length of the name for the create node (default: 5) + name_length: IntegerGetter + + # data for create nodes (default: "") + data: StringGetter + + # value in range [0.0, 1.0> denoting how often a remove request should be generated compared to create request (default: 0) + remove_factor: double +``` + +#### `set` + +```yaml +set: + # paths on which we randomly set data + path: PathGetter + + # data to set + data: StringGetter +``` + +#### `get` + +```yaml +get: + # paths for which we randomly get data + path: PathGetter +``` + +#### `list` + +```yaml +list: + # paths for which we randomly do list request + path: PathGetter +``` + +#### `multi` + +```yaml +multi: + # any request definition defined above can be added + + # optional size for the multi request + size: IntegerGetter +``` + +Multi request definition can contain any other request generator definitions described above. \ +If `size` key is defined, we will randomly pick `size` amount of requests from defined request generators. \ +All request generators can have a higher pick probability by using `weight` key. \ +If `size` is not defined, multi request with same request generators will always be generated. \ +Both write and read multi requests are supported. + +#### Example + +```yaml +generator: + requests: + create: + path: "/test_create" + name_length: + min_value: 10 + max_value: 20 + multi: + weight: 20 + size: 10 + get: + path: + children_of: "/test_get1" + get: + weight: 2 + path: + children_of: "/test_get2" +``` + +We defined a request geneator that will generate either a `create` or a `multi` request. \ +Each `create` request will create a node under `/test_create` with a randomly generated name with size from range `[10, 20]`. \ +`multi` request will be generated 20 times more than `create` request. \ +`multi` request will contain 10 requests and approximately twice as much get requests to children of "/test_get2". + + +## Output + +```yaml +output: + # if defined, JSON output of results will be stored at the defined path + file: string + # or + file: + # if defined, JSON output of results will be stored at the defined path + path: string + + # if set to true, timestamp will be appended to the output file name (default: false) + with_timestamp: boolean + + # if set to true, output will be printed to stdout also (default: false) + stdout: boolean +``` diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 2f3cf4b0620..f86d2b44dd7 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,15 +1,160 @@ #include "Runner.h" +#include -namespace DB +#include "Common/ZooKeeper/ZooKeeperCommon.h" +#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include +#include +#include "IO/ReadBufferFromString.h" +#include +#include +#include + +namespace CurrentMetrics { + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} -namespace ErrorCodes +namespace DB::ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; + extern const int BAD_ARGUMENTS; } +Runner::Runner( + std::optional concurrency_, + const std::string & config_path, + const Strings & hosts_strings_, + std::optional max_time_, + std::optional delay_, + std::optional continue_on_error_, + std::optional max_iterations_) + : info(std::make_shared()) +{ + + DB::ConfigProcessor config_processor(config_path, true, false); + auto config = config_processor.loadConfig().configuration; + + generator.emplace(*config); + + if (!hosts_strings_.empty()) + { + for (const auto & host : hosts_strings_) + connection_infos.push_back({.host = host}); + } + else + { + if (!config) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No config file or hosts defined"); + + parseHostsFromConfig(*config); + } + + std::cerr << "---- Run options ---- " << std::endl; + static constexpr uint64_t DEFAULT_CONCURRENCY = 1; + if (concurrency_) + concurrency = *concurrency_; + else + concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY); + std::cerr << "Concurrency: " << concurrency << std::endl; + + static constexpr uint64_t DEFAULT_ITERATIONS = 0; + if (max_iterations_) + max_iterations = *max_iterations_; + else + max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS); + std::cerr << "Iterations: " << max_iterations << std::endl; + + static constexpr double DEFAULT_DELAY = 1.0; + if (delay_) + delay = *delay_; + else + delay = config->getDouble("report_delay", DEFAULT_DELAY); + std::cerr << "Report delay: " << delay << std::endl; + + static constexpr double DEFAULT_TIME_LIMIT = 0.0; + if (max_time_) + max_time = *max_time_; + else + max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT); + std::cerr << "Time limit: " << max_time << std::endl; + + if (continue_on_error_) + continue_on_error = *continue_on_error_; + else + continue_on_error = config->getBool("continue_on_error", false); + std::cerr << "Continue on error: " << continue_on_error << std::endl; + + static const std::string output_key = "output"; + print_to_stdout = config->getBool(output_key + ".stdout", false); + std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; + + static const std::string output_file_key = output_key + ".file"; + if (config->has(output_file_key)) + { + if (config->has(output_file_key + ".path")) + { + file_output = config->getString(output_file_key + ".path"); + output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); + } + else + file_output = config->getString(output_file_key); + + std::cerr << "Result file path: " << file_output->string() << std::endl; + } + + std::cerr << "---- Run options ----\n" << std::endl; + + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency); + queue.emplace(concurrency); } +void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + ConnectionInfo default_connection_info; + + const auto fill_connection_details = [&](const std::string & key, auto & connection_info) + { + if (config.has(key + ".secure")) + connection_info.secure = config.getBool(key + ".secure"); + + if (config.has(key + ".session_timeout_ms")) + connection_info.session_timeout_ms = config.getInt(key + ".session_timeout_ms"); + + if (config.has(key + ".operation_timeout_ms")) + connection_info.operation_timeout_ms = config.getInt(key + ".operation_timeout_ms"); + + if (config.has(key + ".connection_timeout_ms")) + connection_info.connection_timeout_ms = config.getInt(key + ".connection_timeout_ms"); + }; + + fill_connection_details("connections", default_connection_info); + + Poco::Util::AbstractConfiguration::Keys connections_keys; + config.keys("connections", connections_keys); + + for (const auto & key : connections_keys) + { + std::string connection_key = "connections." + key; + auto connection_info = default_connection_info; + if (key.starts_with("host")) + { + connection_info.host = config.getString(connection_key); + connection_infos.push_back(std::move(connection_info)); + } + else if (key.starts_with("connection") && key != "connection_timeout_ms") + { + connection_info.host = config.getString(connection_key + ".host"); + if (config.has(connection_key + ".sessions")) + connection_info.sessions = config.getUInt64(connection_key + ".sessions"); + + fill_connection_details(connection_key, connection_info); + + connection_infos.push_back(std::move(connection_info)); + } + } +} void Runner::thread(std::vector> zookeepers) { @@ -33,7 +178,7 @@ void Runner::thread(std::vector> zookee while (!extracted) { - extracted = queue.tryPop(request, 100); + extracted = queue->tryPop(request, 100); if (shutdown || (max_iterations && requests_executed >= max_iterations)) @@ -47,9 +192,35 @@ void Runner::thread(std::vector> zookee auto promise = std::make_shared>(); auto future = promise->get_future(); - Coordination::ResponseCallback callback = [promise](const Coordination::Response & response) + Coordination::ResponseCallback callback = [&request, promise](const Coordination::Response & response) { - if (response.error != Coordination::Error::ZOK) + bool set_exception = true; + + if (response.error == Coordination::Error::ZOK) + { + set_exception = false; + } + else if (response.error == Coordination::Error::ZNONODE) + { + /// remove can fail with ZNONODE because of different order of execution + /// of generated create and remove requests + /// this is okay for concurrent runs + if (dynamic_cast(&response)) + set_exception = false; + else if (const auto * multi_response = dynamic_cast(&response)) + { + const auto & responses = multi_response->responses; + size_t i = 0; + while (responses[i]->error != Coordination::Error::ZNONODE) + ++i; + + const auto & multi_request = dynamic_cast(*request); + if (dynamic_cast(&*multi_request.requests[i])) + set_exception = false; + } + } + + if (set_exception) promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); else promise->set_value(response.bytesSize()); @@ -62,14 +233,14 @@ void Runner::thread(std::vector> zookee try { auto response_size = future.get(); - double seconds = watch.elapsedSeconds(); + auto microseconds = watch.elapsedMicroseconds(); std::lock_guard lock(mutex); if (request->isReadRequest()) - info->addRead(seconds, 1, request->bytesSize() + response_size); + info->addRead(microseconds, 1, request->bytesSize() + response_size); else - info->addWrite(seconds, 1, request->bytesSize() + response_size); + info->addWrite(microseconds, 1, request->bytesSize() + response_size); } catch (...) { @@ -95,7 +266,7 @@ void Runner::thread(std::vector> zookee { try { - zookeepers = getConnections(); + zookeepers = refreshConnections(); break; } catch (...) @@ -110,13 +281,13 @@ void Runner::thread(std::vector> zookee } } -bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener) +bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener) { bool inserted = false; while (!inserted) { - inserted = queue.tryPush(request, 100); + inserted = queue->tryPush(std::move(request), 100); if (shutdown) { @@ -126,13 +297,13 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr if (max_time > 0 && total_watch.elapsedSeconds() >= max_time) { - std::cout << "Stopping launch of queries. Requested time limit is exhausted.\n"; + std::cerr << "Stopping launch of queries. Requested time limit is exhausted.\n"; return false; } if (interrupt_listener.check()) { - std::cout << "Stopping launch of queries. SIGINT received." << std::endl; + std::cerr << "Stopping launch of queries. SIGINT received." << std::endl; return false; } @@ -141,7 +312,7 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr printNumberOfRequestsExecuted(requests_executed); std::lock_guard lock(mutex); - report(info, concurrency); + info->report(concurrency); delay_watch.restart(); } } @@ -152,23 +323,26 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr void Runner::runBenchmark() { - auto aux_connections = getConnections(); + createConnections(); std::cerr << "Preparing to run\n"; - generator->startup(*aux_connections[0]); + generator->startup(*connections[0]); std::cerr << "Prepared\n"; + + auto start_timestamp_ms = Poco::Timestamp().epochMicroseconds() / 1000; + try { for (size_t i = 0; i < concurrency; ++i) { - auto connections = getConnections(); - pool.scheduleOrThrowOnError([this, connections]() mutable { thread(connections); }); + auto thread_connections = connections; + pool->scheduleOrThrowOnError([this, connections = std::move(thread_connections)]() mutable { thread(connections); }); } } catch (...) { shutdown = true; - pool.wait(); + pool->wait(); throw; } @@ -185,31 +359,102 @@ void Runner::runBenchmark() } } - pool.wait(); + pool->wait(); total_watch.stop(); printNumberOfRequestsExecuted(requests_executed); std::lock_guard lock(mutex); - report(info, concurrency); -} + info->report(concurrency); + DB::WriteBufferFromOwnString out; + info->writeJSON(out, concurrency, start_timestamp_ms); + auto output_string = std::move(out.str()); -std::vector> Runner::getConnections() -{ - std::vector> zookeepers; - for (const auto & host_string : hosts_strings) + if (print_to_stdout) + std::cout << output_string << std::endl; + + if (file_output) { - Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{host_string}, false}; - std::vector nodes; - nodes.push_back(node); - zkutil::ZooKeeperArgs args; - args.session_timeout_ms = 30000; - args.connection_timeout_ms = 1000; - args.operation_timeout_ms = 10000; - zookeepers.emplace_back(std::make_shared(nodes, args, nullptr)); + auto path = *file_output; + + if (output_file_with_timestamp) + { + auto filename = file_output->filename(); + filename = fmt::format("{}_{}{}", filename.stem().generic_string(), start_timestamp_ms, filename.extension().generic_string()); + path = file_output->parent_path() / filename; + } + + std::cerr << "Storing output to " << path << std::endl; + + DB::WriteBufferFromFile file_output_buffer(path); + DB::ReadBufferFromString read_buffer(output_string); + DB::copyData(read_buffer, file_output_buffer); } - - - return zookeepers; } + + +void Runner::createConnections() +{ + DB::EventNotifier::init(); + std::cerr << "---- Creating connections ---- " << std::endl; + for (size_t connection_info_idx = 0; connection_info_idx < connection_infos.size(); ++connection_info_idx) + { + const auto & connection_info = connection_infos[connection_info_idx]; + std::cerr << fmt::format("Creating {} session(s) for:\n" + "- host: {}\n" + "- secure: {}\n" + "- session timeout: {}ms\n" + "- operation timeout: {}ms\n" + "- connection timeout: {}ms", + connection_info.sessions, + connection_info.host, + connection_info.secure, + connection_info.session_timeout_ms, + connection_info.operation_timeout_ms, + connection_info.connection_timeout_ms) << std::endl; + + for (size_t session = 0; session < connection_info.sessions; ++session) + { + connections.emplace_back(getConnection(connection_info)); + connections_to_info_map[connections.size() - 1] = connection_info_idx; + } + } + std::cerr << "---- Done creating connections ----\n" << std::endl; +} + +std::shared_ptr Runner::getConnection(const ConnectionInfo & connection_info) +{ + Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{connection_info.host}, connection_info.secure}; + std::vector nodes; + nodes.push_back(node); + zkutil::ZooKeeperArgs args; + args.session_timeout_ms = connection_info.session_timeout_ms; + args.connection_timeout_ms = connection_info.operation_timeout_ms; + args.operation_timeout_ms = connection_info.connection_timeout_ms; + return std::make_shared(nodes, args, nullptr); +} + +std::vector> Runner::refreshConnections() +{ + std::lock_guard lock(connection_mutex); + for (size_t connection_idx = 0; connection_idx < connections.size(); ++connection_idx) + { + auto & connection = connections[connection_idx]; + if (connection->isExpired()) + { + const auto & connection_info = connection_infos[connections_to_info_map[connection_idx]]; + connection = getConnection(connection_info); + } + } + return connections; +} + +Runner::~Runner() +{ + queue->clearAndFinish(); + shutdown = true; + pool->wait(); + generator->cleanup(*connections[0]); +} + diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index 3976ac720eb..f899f1d538d 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -1,17 +1,21 @@ #pragma once +#include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include "Generator.h" #include +#include #include #include #include -#include -#include #include +#include #include +#include #include "Stats.h" +#include + using Ports = std::vector; using Strings = std::vector; @@ -19,25 +23,13 @@ class Runner { public: Runner( - size_t concurrency_, - const std::string & generator_name, + std::optional concurrency_, + const std::string & config_path, const Strings & hosts_strings_, - double max_time_, - double delay_, - bool continue_on_error_, - size_t max_iterations_) - : concurrency(concurrency_) - , pool(concurrency) - , hosts_strings(hosts_strings_) - , generator(getGenerator(generator_name)) - , max_time(max_time_) - , delay(delay_) - , continue_on_error(continue_on_error_) - , max_iterations(max_iterations_) - , info(std::make_shared()) - , queue(concurrency) - { - } + std::optional max_time_, + std::optional delay_, + std::optional continue_on_error_, + std::optional max_iterations_); void thread(std::vector> zookeepers); @@ -46,18 +38,19 @@ public: std::cerr << "Requests executed: " << num << ".\n"; } - bool tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener); + bool tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener); void runBenchmark(); - + ~Runner(); private: + void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config); size_t concurrency = 1; - ThreadPool pool; - Strings hosts_strings; - std::unique_ptr generator; + std::optional pool; + + std::optional generator; double max_time = 0; double delay = 1; bool continue_on_error = false; @@ -66,6 +59,9 @@ private: std::atomic shutdown = false; std::shared_ptr info; + bool print_to_stdout; + std::optional file_output; + bool output_file_with_timestamp; Stopwatch total_watch; Stopwatch delay_watch; @@ -73,7 +69,26 @@ private: std::mutex mutex; using Queue = ConcurrentBoundedQueue; - Queue queue; + std::optional queue; - std::vector> getConnections(); + struct ConnectionInfo + { + std::string host; + + bool secure = false; + int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; + int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; + + size_t sessions = 1; + }; + + std::mutex connection_mutex; + std::vector connection_infos; + std::vector> connections; + std::unordered_map connections_to_info_map; + + void createConnections(); + std::shared_ptr getConnection(const ConnectionInfo & connection_info); + std::vector> refreshConnections(); }; diff --git a/utils/keeper-bench/Stats.cpp b/utils/keeper-bench/Stats.cpp index 1f8b02ed09d..3e7e92db713 100644 --- a/utils/keeper-bench/Stats.cpp +++ b/utils/keeper-bench/Stats.cpp @@ -1,67 +1,177 @@ #include "Stats.h" #include -void report(std::shared_ptr & info, size_t concurrency) +#include +#include +#include +#include + +void Stats::StatsCollector::add(uint64_t microseconds, size_t requests_inc, size_t bytes_inc) +{ + work_time += microseconds; + requests += requests_inc; + requests_bytes += bytes_inc; + sampler.insert(microseconds); +} + +void Stats::addRead(uint64_t microseconds, size_t requests_inc, size_t bytes_inc) +{ + read_collector.add(microseconds, requests_inc, bytes_inc); +} + +void Stats::addWrite(uint64_t microseconds, size_t requests_inc, size_t bytes_inc) +{ + write_collector.add(microseconds, requests_inc, bytes_inc); +} + +void Stats::StatsCollector::clear() +{ + requests = 0; + work_time = 0; + requests_bytes = 0; + sampler.clear(); +} + +void Stats::clear() +{ + read_collector.clear(); + write_collector.clear(); +} + +std::pair Stats::StatsCollector::getThroughput(size_t concurrency) +{ + assert(requests != 0); + double seconds = work_time / 1'000'000.0 / concurrency; + + return {requests / seconds, requests_bytes / seconds}; +} + +double Stats::StatsCollector::getPercentile(double percent) +{ + return sampler.quantileNearest(percent / 100.0) / 1000.0; +} + +void Stats::report(size_t concurrency) { std::cerr << "\n"; + const auto & read_requests = read_collector.requests; + const auto & write_requests = write_collector.requests; + /// Avoid zeros, nans or exceptions - if (0 == info->read_requests && 0 == info->write_requests) + if (0 == read_requests && 0 == write_requests) return; - double read_seconds = info->read_work_time / concurrency; - double write_seconds = info->write_work_time / concurrency; + auto [read_rps, read_bps] = read_collector.getThroughput(concurrency); + auto [write_rps, write_bps] = write_collector.getThroughput(concurrency); - std::cerr << "read requests " << info->read_requests << ", write requests " << info->write_requests << ", "; - if (info->errors) - { - std::cerr << "errors " << info->errors << ", "; - } - if (0 != info->read_requests) + std::cerr << "read requests " << read_requests << ", write requests " << write_requests << ", "; + if (errors) + std::cerr << "errors " << errors << ", "; + + if (0 != read_requests) { std::cerr - << "Read RPS: " << (info->read_requests / read_seconds) << ", " - << "Read MiB/s: " << (info->requests_read_bytes / read_seconds / 1048576); - if (0 != info->write_requests) + << "Read RPS: " << read_rps << ", " + << "Read MiB/s: " << read_bps / 1048576; + + if (0 != write_requests) std::cerr << ", "; } - if (0 != info->write_requests) + + if (0 != write_requests) { std::cerr - << "Write RPS: " << (info->write_requests / write_seconds) << ", " - << "Write MiB/s: " << (info->requests_write_bytes / write_seconds / 1048576) << ". " + << "Write RPS: " << write_rps << ", " + << "Write MiB/s: " << write_bps / 1048576 << ". " << "\n"; } std::cerr << "\n"; - auto print_percentile = [&](double percent, Stats::Sampler & sampler) + auto print_percentile = [&](double percent, Stats::StatsCollector & collector) { std::cerr << percent << "%\t\t"; - std::cerr << sampler.quantileNearest(percent / 100.0) << " sec.\t"; + std::cerr << collector.getPercentile(percent) << " msec.\t"; std::cerr << "\n"; }; - if (0 != info->read_requests) + const auto print_all_percentiles = [&](auto & collector) + { + for (int percent = 0; percent <= 90; percent += 10) + print_percentile(percent, collector); + + print_percentile(95, collector); + print_percentile(99, collector); + print_percentile(99.9, collector); + print_percentile(99.99, collector); + }; + + if (0 != read_requests) { std::cerr << "Read sampler:\n"; - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(percent, info->read_sampler); - - print_percentile(95, info->read_sampler); - print_percentile(99, info->read_sampler); - print_percentile(99.9, info->read_sampler); - print_percentile(99.99, info->read_sampler); + print_all_percentiles(read_collector); } - if (0 != info->write_requests) + if (0 != write_requests) { std::cerr << "Write sampler:\n"; - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(percent, info->write_sampler); - - print_percentile(95, info->write_sampler); - print_percentile(99, info->write_sampler); - print_percentile(99.9, info->write_sampler); - print_percentile(99.99, info->write_sampler); + print_all_percentiles(write_collector); } } + +void Stats::writeJSON(DB::WriteBuffer & out, size_t concurrency, int64_t start_timestamp) +{ + using namespace rapidjson; + Document results; + auto & allocator = results.GetAllocator(); + results.SetObject(); + + results.AddMember("timestamp", Value(start_timestamp), allocator); + + const auto get_results = [&](auto & collector) + { + Value specific_results(kObjectType); + + specific_results.AddMember("total_requests", Value(static_cast(collector.requests)), allocator); + + auto [rps, bps] = collector.getThroughput(concurrency); + specific_results.AddMember("requests_per_second", Value(rps), allocator); + specific_results.AddMember("bytes_per_second", Value(bps), allocator); + + Value percentiles(kArrayType); + + const auto add_percentile = [&](double percent) + { + Value percentile(kObjectType); + Value percent_key(fmt::format("{:.2f}", percent).c_str(), allocator); + percentile.AddMember(percent_key, Value(collector.getPercentile(percent)), allocator); + percentiles.PushBack(percentile, allocator); + }; + + for (int percent = 0; percent <= 90; percent += 10) + add_percentile(percent); + + add_percentile(95); + add_percentile(99); + add_percentile(99.9); + add_percentile(99.99); + + specific_results.AddMember("percentiles", percentiles, allocator); + + return specific_results; + }; + + if (read_collector.requests != 0) + results.AddMember("read_results", get_results(read_collector), results.GetAllocator()); + + if (write_collector.requests != 0) + results.AddMember("write_results", get_results(write_collector), results.GetAllocator()); + + StringBuffer strbuf; + strbuf.Clear(); + Writer writer(strbuf); + results.Accept(writer); + + const char * output_string = strbuf.GetString(); + out.write(output_string, strlen(output_string)); +} diff --git a/utils/keeper-bench/Stats.h b/utils/keeper-bench/Stats.h index 1b9a31bb734..bc50588e837 100644 --- a/utils/keeper-bench/Stats.h +++ b/utils/keeper-bench/Stats.h @@ -5,48 +5,38 @@ #include +#include + struct Stats { - std::atomic read_requests{0}; - std::atomic write_requests{0}; size_t errors = 0; - size_t requests_write_bytes = 0; - size_t requests_read_bytes = 0; - double read_work_time = 0; - double write_work_time = 0; using Sampler = ReservoirSampler; - Sampler read_sampler {1 << 16}; - Sampler write_sampler {1 << 16}; - - void addRead(double seconds, size_t requests_inc, size_t bytes_inc) + struct StatsCollector { - read_work_time += seconds; - read_requests += requests_inc; - requests_read_bytes += bytes_inc; - read_sampler.insert(seconds); - } + std::atomic requests{0}; + uint64_t requests_bytes = 0; + uint64_t work_time = 0; + Sampler sampler; - void addWrite(double seconds, size_t requests_inc, size_t bytes_inc) - { - write_work_time += seconds; - write_requests += requests_inc; - requests_write_bytes += bytes_inc; - write_sampler.insert(seconds); - } + /// requests/second, bytes/second + std::pair getThroughput(size_t concurrency); + double getPercentile(double percent); - void clear() - { - read_requests = 0; - write_requests = 0; - read_work_time = 0; - write_work_time = 0; - requests_read_bytes = 0; - requests_write_bytes = 0; - read_sampler.clear(); - write_sampler.clear(); - } + void add(uint64_t microseconds, size_t requests_inc, size_t bytes_inc); + void clear(); + }; + + StatsCollector read_collector; + StatsCollector write_collector; + + void addRead(uint64_t microseconds, size_t requests_inc, size_t bytes_inc); + void addWrite(uint64_t microseconds, size_t requests_inc, size_t bytes_inc); + + void clear(); + + void report(size_t concurrency); + void writeJSON(DB::WriteBuffer & out, size_t concurrency, int64_t start_timestamp); }; -void report(std::shared_ptr & info, size_t concurrency); diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml new file mode 100644 index 00000000000..e800e923482 --- /dev/null +++ b/utils/keeper-bench/example.yaml @@ -0,0 +1,117 @@ +concurrency: 20 +iterations: 10000 +delay: 4 +timelimit: 300 +continue_on_errors: true + +connections: + operation_timeout_ms: 3000 + connection_timeout_ms: 40000 + + connection: + secure: false + operation_timeout_ms: 2000 + session_timeout_ms: 2000 + connection_timeout_ms: 50000 + host: "localhost:9181" + sessions: 1 + + host: "localhost:9181" + +generator: + setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" + node: + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + repeat: 2 + node: + repeat: 2 + name: + random_string: + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + requests: + create: + path: "/test_create" + name_length: 10 + remove_factor: 0.5 + multi: + size: 20 + create: + path: "/test" + data: + random_string: + size: + min_value: 10 + max_value: 20 + remove_factor: 0.8 + set: + weight: 2 + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + data: + random_string: + size: 10 + get: + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + + multi: + weight: 10 + get: + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + list: + path: + - "/test3" + path: + children_of: "/test" + + list: + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + +output: + file: + path: "output.json" + with_timestamp: true + stdout: true diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 39af28e7580..0753d66850f 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -3,10 +3,24 @@ #include "Runner.h" #include "Stats.h" #include "Generator.h" +#include "Common/Exception.h" #include #include +#include -using namespace std; +namespace +{ + +template +std::optional valueToOptional(const boost::program_options::variable_value & value) +{ + if (value.empty()) + return std::nullopt; + + return value.as(); +} + +} int main(int argc, char *argv[]) { @@ -19,15 +33,14 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() - ("help", "produce help message") - ("generator", value()->default_value("set_small_data"), "query to execute") - ("concurrency,c", value()->default_value(1), "number of parallel queries") - ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value()->default_value(0), "amount of queries to be executed") - ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken(), "") + ("help", "produce help message") + ("config", value()->default_value(""), "yaml/xml file containing configuration") + ("concurrency,c", value(), "number of parallel queries") + ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value(), "amount of queries to be executed") + ("time-limit,t", value(), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") ("continue_on_errors", "continue testing even if a query fails") - ("reconnect", "establish new connection for every query") ; boost::program_options::variables_map options; @@ -41,15 +54,22 @@ int main(int argc, char *argv[]) return 1; } - Runner runner(options["concurrency"].as(), - options["generator"].as(), - options["hosts"].as(), - options["timelimit"].as(), - options["delay"].as(), - options.count("continue_on_errors"), - options["iterations"].as()); + Runner runner(valueToOptional(options["concurrency"]), + options["config"].as(), + options["hosts"].as(), + valueToOptional(options["time-limit"]), + valueToOptional(options["report-delay"]), + options.count("continue_on_errors") ? std::optional(true) : std::nullopt, + valueToOptional(options["iterations"])); - runner.runBenchmark(); + try + { + runner.runBenchmark(); + } + catch (const DB::Exception & e) + { + std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl; + } return 0; } diff --git a/utils/keeper-data-dumper/CMakeLists.txt b/utils/keeper-data-dumper/CMakeLists.txt index 1f55e50e68e..136d6f2268c 100644 --- a/utils/keeper-data-dumper/CMakeLists.txt +++ b/utils/keeper-data-dumper/CMakeLists.txt @@ -1,2 +1,7 @@ +if (NOT TARGET ch_contrib::nuraft) + message (WARNING "Not building keeper-data-dumper due to nuraft is disabled") + return () +endif () + clickhouse_add_executable(keeper-data-dumper main.cpp) target_link_libraries(keeper-data-dumper PRIVATE dbms) diff --git a/utils/keeper-overload/keeper-overload.py b/utils/keeper-overload/keeper-overload.py index bdb4563c713..0a059b10588 100755 --- a/utils/keeper-overload/keeper-overload.py +++ b/utils/keeper-overload/keeper-overload.py @@ -166,7 +166,7 @@ def main(args): keeper_bench_path = args.keeper_bench_path keepers = [] - for (port, server_id) in zip(PORTS, SERVER_IDS): + for port, server_id in zip(PORTS, SERVER_IDS): keepers.append( Keeper( keeper_binary_path, server_id, port, workdir, args.with_thread_fuzzer diff --git a/utils/list-licenses/list-licenses.sh b/utils/list-licenses/list-licenses.sh index db3eb5e59e8..dd23e6321c8 100755 --- a/utils/list-licenses/list-licenses.sh +++ b/utils/list-licenses/list-licenses.sh @@ -40,14 +40,21 @@ ls -1 -d ${LIBS_PATH}/*/ | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while ${GREP_CMD} -q -i -F 'Altered source versions must be plainly marked as such' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'This notice may not be removed or altered' "$LIB_LICENSE" && echo "zLib") || + (${GREP_CMD} -q -i -F 'This program, "bzip2", the associated library "libbzip2"' "$LIB_LICENSE" && + echo "bzip2") || (${GREP_CMD} -q -i -F 'Permission is hereby granted, free of charge, to any person' "$LIB_LICENSE" && - ${GREP_CMD} -q -i -F 'The above copyright notice and this permission notice shall be included' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'The above copyright notice and this permission notice shall be' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND' "$LIB_LICENSE" && echo "MIT") || + (${GREP_CMD} -q -F 'PostgreSQL' "$LIB_LICENSE" && + echo "PostgreSQL") || (${GREP_CMD} -q -i -F 'Permission to use, copy, modify, and distribute this software for any purpose' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'the name of a copyright holder shall not' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND' "$LIB_LICENSE" && echo "MIT/curl") || + (${GREP_CMD} -q -i -F 'OpenLDAP Public License' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'Version 2.8' "$LIB_LICENSE" && + echo "OpenLDAP Version 2.8") || (${GREP_CMD} -q -i -F 'Redistributions of source code must retain the above copyright' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'Redistributions in binary form must reproduce' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'Neither the name' "$LIB_LICENSE" && @@ -55,6 +62,14 @@ ls -1 -d ${LIBS_PATH}/*/ | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while (${GREP_CMD} -q -i -F 'Redistributions of source code must retain the above copyright' "$LIB_LICENSE" && ${GREP_CMD} -q -i -F 'Redistributions in binary form must reproduce' "$LIB_LICENSE" && echo "BSD 2-clause") || + (${GREP_CMD} -q -i -F 'Permission to use, copy, modify, and distribute this software' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'documentation for any purpose and without fee is hereby granted' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'the above copyright notice appear in all copies and that both that copyright' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'notice and this permission notice appear in supporting documentation' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'not be used in advertising or publicity pertaining' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'distribution of the software without specific, written prior permission' "$LIB_LICENSE" && + ${GREP_CMD} -q -i -F 'makes no representations about the suitability of this software' "$LIB_LICENSE" && + echo "HPND") || echo "Unknown") RELATIVE_PATH=$(echo "$LIB_LICENSE" | sed -r -e 's!^.+/contrib/!/contrib/!') diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 3814e94bf24..653a0cd5388 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,7 +1,23 @@ +v23.4.2.11-stable 2023-05-02 +v23.4.1.1943-stable 2023-04-27 +v23.3.2.37-lts 2023-04-22 +v23.3.1.2823-lts 2023-03-31 +v23.2.6.34-stable 2023-04-23 +v23.2.5.46-stable 2023-04-03 +v23.2.4.12-stable 2023-03-10 +v23.2.3.17-stable 2023-03-06 +v23.2.2.20-stable 2023-03-01 v23.2.1.2537-stable 2023-02-23 +v23.1.7.30-stable 2023-04-22 +v23.1.6.42-stable 2023-04-03 +v23.1.5.24-stable 2023-03-10 +v23.1.4.58-stable 2023-03-01 v23.1.3.5-stable 2023-02-03 v23.1.2.9-stable 2023-01-29 v23.1.1.3077-stable 2023-01-25 +v22.12.6.22-stable 2023-03-31 +v22.12.5.34-stable 2023-03-10 +v22.12.4.76-stable 2023-03-01 v22.12.3.5-stable 2023-01-10 v22.12.2.25-stable 2023-01-06 v22.12.1.1752-stable 2022-12-15 @@ -25,6 +41,10 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.17.17-lts 2023-04-22 +v22.8.16.32-lts 2023-04-04 +v22.8.15.23-lts 2023-03-10 +v22.8.14.53-lts 2023-02-27 v22.8.13.20-lts 2023-01-29 v22.8.12.45-lts 2023-01-10 v22.8.11.15-lts 2022-12-08 @@ -63,6 +83,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.20.29-lts 2023-03-31 v22.3.19.6-lts 2023-02-27 v22.3.18.37-lts 2023-02-15 v22.3.17.13-lts 2023-01-12 diff --git a/utils/memcpy-bench/CMakeLists.txt b/utils/memcpy-bench/CMakeLists.txt index 460a06ba851..c0b0b8a589d 100644 --- a/utils/memcpy-bench/CMakeLists.txt +++ b/utils/memcpy-bench/CMakeLists.txt @@ -1,3 +1,9 @@ +# memcpy_jart.S contains position dependent code +if (CMAKE_POSITION_INDEPENDENT_CODE OR NOT OS_LINUX OR NOT ARCH_AMD64) + message (WARNING "Not building memcpy-bench due to PIC enabled or not Linux or not x86") + return () +endif () + enable_language(ASM) clickhouse_add_executable (memcpy-bench diff --git a/utils/memcpy-bench/memcpy-bench.cpp b/utils/memcpy-bench/memcpy-bench.cpp index 8b75164eb60..bb571200d07 100644 --- a/utils/memcpy-bench/memcpy-bench.cpp +++ b/utils/memcpy-bench/memcpy-bench.cpp @@ -1,6 +1,4 @@ -#ifdef HAS_RESERVED_IDENTIFIER #pragma clang diagnostic ignored "-Wreserved-identifier" -#endif #include #include diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader index 33db76f57f4..d53661614c0 100755 --- a/utils/s3tools/s3uploader +++ b/utils/s3tools/s3uploader @@ -28,39 +28,48 @@ class S3API(object): bucket = self.connection.get_bucket(bucket_name) key = bucket.initiate_multipart_upload(s3_path) logging.info("Will upload to s3 path %s", s3_path) - chunksize = 1024 * 1024 * 1024 # 1 GB + chunksize = 1024 * 1024 * 1024 # 1 GB filesize = os.stat(file_path).st_size logging.info("File size is %s", filesize) chunkcount = int(math.ceil(filesize / chunksize)) def call_back(x, y): print("Uploaded {}/{} bytes".format(x, y)) + try: for i in range(chunkcount + 1): logging.info("Uploading chunk %s of %s", i, chunkcount + 1) offset = chunksize * i bytes_size = min(chunksize, filesize - offset) - with open(file_path, 'r') as fp: + with open(file_path, "r") as fp: fp.seek(offset) - key.upload_part_from_file(fp=fp, part_num=i+1, - size=bytes_size, cb=call_back, - num_cb=100) + key.upload_part_from_file( + fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100 + ) key.complete_upload() except Exception as ex: key.cancel_upload() raise ex logging.info("Contents were set") return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + bucket=bucket_name, mds_url=self.mds_url, path=s3_path + ) def set_file_contents(self, bucket, local_file_path, s3_file_path): key = Key(bucket) key.key = s3_file_path file_size = os.stat(local_file_path).st_size - logging.info("Uploading file `%s` to `%s`. Size is %s", local_file_path, s3_file_path, file_size) + logging.info( + "Uploading file `%s` to `%s`. Size is %s", + local_file_path, + s3_file_path, + file_size, + ) + def call_back(x, y): print("Uploaded {}/{} bytes".format(x, y)) + key.set_contents_from_filename(local_file_path, cb=call_back) def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path): @@ -74,12 +83,14 @@ class S3API(object): path = root.split(os.sep) for file in files: local_file_path = os.path.join(root, file) - s3_file = local_file_path[len(directory_path) + 1:] + s3_file = local_file_path[len(directory_path) + 1 :] s3_file_path = os.path.join(s3_path, s3_file) self.set_file_contents(bucket, local_file_path, s3_file_path) logging.info("Uploading finished") - return "https://{bucket}.{mds_url}/{path}".format(bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + return "https://{bucket}.{mds_url}/{path}".format( + bucket=bucket_name, mds_url=self.mds_url, path=s3_path + ) def list_bucket_keys(self, bucket_name): bucket = self.connection.get_bucket(bucket_name) @@ -91,100 +102,119 @@ class S3API(object): bucket.get_all_keys() for obj in bucket.get_all_keys(): if obj.key.startswith(folder_path): - print('Removing ' + obj.key) + print("Removing " + obj.key) obj.delete() -def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, - tmp_prefix): - - relative_data_path = os.path.join('data', db_name, table_name) - relative_meta_path = os.path.join('metadata', db_name, table_name + '.sql') +def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix): + relative_data_path = os.path.join("data", db_name, table_name) + relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql") path_to_data = os.path.join(clickhouse_data_path, relative_data_path) path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path) - temporary_file_name = tmp_prefix + '/{tname}.tar'.format(tname=table_name) + temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name) with tarfile.open(temporary_file_name, "w") as bundle: bundle.add(path_to_data, arcname=relative_data_path) bundle.add(path_to_metadata, arcname=relative_meta_path) return temporary_file_name -USAGE_EXAMPLES = ''' +USAGE_EXAMPLES = """ examples: \t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket \t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/ -''' +""" if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser( description="Simple tool for uploading datasets to clickhouse S3", - usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES)) - parser.add_argument('--s3-api-url', default='s3.amazonaws.com') - parser.add_argument('--s3-common-url', default='s3.amazonaws.com') - parser.add_argument('--bucket-name', default='clickhouse-datasets') - parser.add_argument('--dataset-name', required=True, - help='Name of dataset, will be used in uploaded path') - parser.add_argument('--access-key-id', required=True) - parser.add_argument('--secret-access-key', required=True) - parser.add_argument('--clickhouse-data-path', - default='/var/lib/clickhouse/', - help='Path to clickhouse database on filesystem') - parser.add_argument('--s3-path', help='Path in s3, where to upload file') - parser.add_argument('--tmp-prefix', default='/tmp', - help='Prefix to store temporary downloaded file') + usage="%(prog)s [options] {}".format(USAGE_EXAMPLES), + ) + parser.add_argument("--s3-api-url", default="s3.amazonaws.com") + parser.add_argument("--s3-common-url", default="s3.amazonaws.com") + parser.add_argument("--bucket-name", default="clickhouse-datasets") + parser.add_argument( + "--dataset-name", + required=True, + help="Name of dataset, will be used in uploaded path", + ) + parser.add_argument("--access-key-id", required=True) + parser.add_argument("--secret-access-key", required=True) + parser.add_argument( + "--clickhouse-data-path", + default="/var/lib/clickhouse/", + help="Path to clickhouse database on filesystem", + ) + parser.add_argument("--s3-path", help="Path in s3, where to upload file") + parser.add_argument( + "--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file" + ) data_group = parser.add_mutually_exclusive_group(required=True) - table_name_argument = data_group.add_argument('--table-name', - help='Name of table with database, if you are uploading partitions') - data_group.add_argument('--file-path', - help='Name of file, if you are uploading') - data_group.add_argument('--directory-path', help='Path to directory with files to upload') - data_group.add_argument('--list-directory', help='List s3 directory by --directory-path') - data_group.add_argument('--remove-directory', help='Remove s3 directory by --directory-path') + table_name_argument = data_group.add_argument( + "--table-name", + help="Name of table with database, if you are uploading partitions", + ) + data_group.add_argument("--file-path", help="Name of file, if you are uploading") + data_group.add_argument( + "--directory-path", help="Path to directory with files to upload" + ) + data_group.add_argument( + "--list-directory", help="List s3 directory by --directory-path" + ) + data_group.add_argument( + "--remove-directory", help="Remove s3 directory by --directory-path" + ) args = parser.parse_args() if args.table_name is not None and args.clickhouse_data_path is None: - raise argparse.ArgumentError(table_name_argument, - "You should specify --clickhouse-data-path to upload --table") + raise argparse.ArgumentError( + table_name_argument, + "You should specify --clickhouse-data-path to upload --table", + ) s3_conn = S3API( - args.access_key_id, args.secret_access_key, - args.s3_api_url, args.s3_common_url) + args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url + ) - file_path = '' + file_path = "" directory_path = args.directory_path s3_path = args.s3_path if args.list_directory: s3_conn.list_bucket_keys(args.bucket_name) elif args.remove_directory: - print('Removing s3 path: ' + args.remove_directory) + print("Removing s3 path: " + args.remove_directory) s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory) elif args.directory_path is not None: - url = s3_conn.upload_data_for_static_files_disk(args.bucket_name, directory_path, s3_path) + url = s3_conn.upload_data_for_static_files_disk( + args.bucket_name, directory_path, s3_path + ) logging.info("Data uploaded: %s", url) else: - if args.table_name is not None: - if '.' not in args.table_name: - db_name = 'default' + if "." not in args.table_name: + db_name = "default" else: - db_name, table_name = args.table_name.split('.') + db_name, table_name = args.table_name.split(".") file_path = make_tar_file_for_table( - args.clickhouse_data_path, db_name, table_name, args.tmp_prefix) + args.clickhouse_data_path, db_name, table_name, args.tmp_prefix + ) else: file_path = args.file_path - if 'tsv' in file_path: + if "tsv" in file_path: s3_path = os.path.join( - args.dataset_name, 'tsv', os.path.basename(file_path)) + args.dataset_name, "tsv", os.path.basename(file_path) + ) if args.table_name is not None: s3_path = os.path.join( - args.dataset_name, 'partitions', os.path.basename(file_path)) + args.dataset_name, "partitions", os.path.basename(file_path) + ) elif args.s3_path is not None: s3_path = os.path.join( - args.dataset_name, args.s3_path, os.path.basename(file_path)) + args.dataset_name, args.s3_path, os.path.basename(file_path) + ) else: raise Exception("Don't know s3-path to upload") diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py index d25612e8bc6..83180ccce1c 100755 --- a/utils/security-generator/generate_security.py +++ b/utils/security-generator/generate_security.py @@ -48,17 +48,20 @@ A public disclosure date is negotiated by the ClickHouse maintainers and the bug """ -def generate_supported_versions(): +def generate_supported_versions() -> str: with open(VERSIONS_FILE, "r", encoding="utf-8") as fd: versions = [line.split(maxsplit=1)[0][1:] for line in fd.readlines()] # The versions in VERSIONS_FILE are ordered ascending, so the first one is # the greatest one. We may have supported versions in the previous year - unsupported_year = int(versions[0].split(".", maxsplit=1)[0]) - 2 - # 3 supported versions - supported = [] # type: List[str] - # 2 LTS versions, one of them could be in supported + greatest_year = int(versions[0].split(".", maxsplit=1)[0]) + unsupported_year = greatest_year - 2 + # 3 regular versions + regular = [] # type: List[str] + max_regular = 3 + # 2 LTS versions, one of them could be in regular lts = [] # type: List[str] + max_lts = 2 # The rest are unsupported unsupported = [] # type: List[str] table = [ @@ -69,18 +72,21 @@ def generate_supported_versions(): year = int(version.split(".")[0]) month = int(version.split(".")[1]) version = f"{year}.{month}" - if version in supported or version in lts: + to_append = "" + if version in regular or version in lts: continue - if len(supported) < 3: - supported.append(version) - if len(lts) < 2 and month in [3, 8]: - # The version can be LTS as well - lts.append(version) - table.append(f"| {version} | ✔️ |") - continue - if len(lts) < 2 and month in [3, 8]: + if len(regular) < max_regular: + regular.append(version) + to_append = f"| {version} | ✔️ |" + if len(lts) < max_lts and month in [3, 8]: lts.append(version) - table.append(f"| {version} | ✔️ |") + to_append = f"| {version} | ✔️ |" + if to_append: + if len(regular) == max_regular and len(lts) == max_lts: + # if we reached the max number of supported versions, the rest + # are unsopported, so year.* will be used + unsupported_year = min(greatest_year - 1, year) + table.append(to_append) continue if year <= unsupported_year: # The whole year is unsopported @@ -92,7 +98,7 @@ def generate_supported_versions(): return "\n".join(table) + "\n" -def main(): +def main() -> None: print(HEADER) print(generate_supported_versions()) print(FOOTER) diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp index 37fbd043814..d41b9b1ebe1 100644 --- a/utils/self-extracting-executable/decompressor.cpp +++ b/utils/self-extracting-executable/decompressor.cpp @@ -168,6 +168,10 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n return 0; } +bool isSudo() +{ + return geteuid() == 0; +} /// Read data about files and decomrpess them. int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask) @@ -220,6 +224,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress return 1; } + bool is_sudo = isSudo(); + FileData file_info; /// Decompress files with appropriate file names for (size_t i = 0; i < le64toh(metadata.number_of_files); ++i) @@ -319,6 +325,9 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress perror("fsync"); if (0 != close(output_fd)) perror("close"); + + if (is_sudo) + chown(file_name, info_in.st_uid, info_in.st_gid); } if (0 != munmap(input, info_in.st_size)) @@ -414,6 +423,13 @@ int main(int/* argc*/, char* argv[]) else name = file_path; + struct stat input_info; + if (0 != stat(self, &input_info)) + { + perror("stat"); + return 1; + } + #if !defined(OS_DARWIN) && !defined(OS_FREEBSD) /// get inode of this executable uint64_t inode = getInode(self); @@ -441,13 +457,6 @@ int main(int/* argc*/, char* argv[]) return 1; } - struct stat input_info; - if (0 != stat(self, &input_info)) - { - perror("stat"); - return 1; - } - /// inconsistency in WSL1 Ubuntu - inode reported in /proc/self/maps is a 64bit to /// 32bit conversion of input_info.st_ino if (input_info.st_ino & 0xFFFFFFFF00000000 && !(inode & 0xFFFFFFFF00000000)) @@ -532,6 +541,9 @@ int main(int/* argc*/, char* argv[]) return 1; } + if (isSudo()) + chown(static_cast(self), input_info.st_uid, input_info.st_gid); + if (has_exec) { #if !defined(OS_DARWIN) && !defined(OS_FREEBSD) diff --git a/utils/test_history/test-history b/utils/test_history/test-history index fdd6c36e9dc..5f031af1d3a 100755 --- a/utils/test_history/test-history +++ b/utils/test_history/test-history @@ -11,13 +11,14 @@ from termcolor import colored import sys COLORMAP = { - "success": colored("success", 'green'), - "failure": colored("failure", 'red'), - "error": colored("error", 'red'), - "pending": colored("pending", 'yellow'), - "not run": colored("not run", 'white'), + "success": colored("success", "green"), + "failure": colored("failure", "red"), + "error": colored("error", "red"), + "pending": colored("pending", "yellow"), + "not run": colored("not run", "white"), } + def _filter_statuses(statuses): """ Squash statuses to latest state @@ -69,7 +70,7 @@ if __name__ == "__main__": date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S") gh = Github(args.token) - repo = gh.get_repo('ClickHouse/ClickHouse') + repo = gh.get_repo("ClickHouse/ClickHouse") commits = get_commits(repo, date_since) longest_header = [] @@ -101,6 +102,6 @@ if __name__ == "__main__": result_data.append(current_result) if sys.stdout.isatty(): - longest_header = [colored(h, 'white', attrs=['bold']) for h in longest_header] + longest_header = [colored(h, "white", attrs=["bold"]) for h in longest_header] print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid")) diff --git a/utils/tests-visualizer/index.html b/utils/tests-visualizer/index.html index 11b2d6504e4..b2db5dbed33 100644 --- a/utils/tests-visualizer/index.html +++ b/utils/tests-visualizer/index.html @@ -20,9 +20,7 @@ width: 130px; display: block; margin: 30px auto; - -webkit-animation: spin 2s ease-in-out infinite; - -moz-animation: spin 2s ease-in-out infinite; - animation: spin 2s ease-in-out infinite; + animation: spin 10s ease-in-out infinite; } h1 { @@ -45,16 +43,9 @@ cursor: pointer; } - @-moz-keyframes spin { - 100% { -moz-transform: rotate(360deg); } - } - - @-webkit-keyframes spin { - 100% { -webkit-transform: rotate(360deg); } - } - @keyframes spin { - 100% { transform:rotate(360deg); } + 50% { transform:scale(150%); } + 100% { transform:scale(100%); } } @@ -67,33 +58,26 @@